Go to the documentation of this file. Go to the SVN repository for this file. 1 /* $Id: blast_options.c 98921 2023-01-24 15:56:52Z boratyng $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 */
26
27 /** @file blast_options.c
28 * The structures and functions in blast_options.[ch] should be used to specify
29 * user preferences. The options structures should not be changed by the BLAST code
30 * but rather be read to determine user preferences. When possible these structures
31 * should be passed in as "const".
32 *
33 */
34
35 #include
36 #include
37 #include
38 #include
39 #include
40 #include
41 #include
42
43 const double kPSSM_NoImpalaScaling = 1.0;
44
45 /** Declared in blast_def.h as extern const. */
46 const int kDustLevel = 20;
47 const int kDustWindow = 64;
48 const int kDustLinker = 1;
49
50 SDustOptions* SDustOptionsFree(SDustOptions* dust_options)
51 {
52 if (dust_options)
53 sfree(dust_options);
54 return NULL;
55 }
56
57 Int2 SDustOptionsNew(SDustOptions* *dust_options)
58 {
59 if (dust_options == NULL)
60 return 1;
61
62 *dust_options = (SDustOptions*) malloc(sizeof(SDustOptions));
63 (*dust_options)->level = kDustLevel;
64 (*dust_options)->window = kDustWindow;
65 (*dust_options)->linker = kDustLinker;
66
67 return 0;
68 }
69
70 SSegOptions* SSegOptionsFree(SSegOptions* seg_options)
71 {
72 if (seg_options)
73 sfree(seg_options);
74 return NULL;
75 }
76
77 Int2 SSegOptionsNew(SSegOptions* *seg_options)
78 {
79 if (seg_options == NULL)
80 return 1;
81
82 *seg_options = (SSegOptions*) malloc(sizeof(SSegOptions));
83 (*seg_options)->window = kSegWindow;
84 (*seg_options)->locut = kSegLocut;
85 (*seg_options)->hicut = kSegHicut;
86
87 return 0;
88 }
89
90 Int2 SWindowMaskerOptionsNew(SWindowMaskerOptions ** winmask_options)
91 {
92 if (winmask_options) {
93 *winmask_options = (SWindowMaskerOptions*) calloc(1, sizeof(SWindowMaskerOptions));
94 if (*winmask_options == NULL)
95 return BLASTERR_MEMORY;
96
97 (*winmask_options)->taxid = 0;
98 (*winmask_options)->database = NULL;
99 return 0;
100 }
101 return 1;
102 }
103
104 SWindowMaskerOptions* SWindowMaskerOptionsFree(SWindowMaskerOptions* winmask_options)
105 {
106 if (winmask_options)
107 {
108 if (winmask_options->database)
109 {
110 sfree(winmask_options->database);
111 }
112 sfree(winmask_options);
113 }
114 return NULL;
115 }
116
117 SRepeatFilterOptions* SRepeatFilterOptionsFree(SRepeatFilterOptions* repeat_options)
118 {
119 if (repeat_options)
120 {
121 sfree(repeat_options->database);
122 sfree(repeat_options);
123 }
124 return NULL;
125 }
126
127 Int2 SRepeatFilterOptionsNew(SRepeatFilterOptions* *repeat_options)
128 {
129
130 if (repeat_options == NULL)
131 return 1;
132
133 *repeat_options = (SRepeatFilterOptions*) calloc(1, sizeof(SRepeatFilterOptions));
134 if (*repeat_options == NULL)
135 return BLASTERR_MEMORY;
136
137 (*repeat_options)->database = strdup(kDefaultRepeatFilterDb);
138
139 return 0;
140 }
141
142 Int2 SRepeatFilterOptionsResetDB(SRepeatFilterOptions* *repeat_options, const char* db)
143 {
144 Int2 status=0;
145
146 if (*repeat_options == NULL)
147 status = SRepeatFilterOptionsNew(repeat_options);
148
149 if (status)
150 return status;
151
152 sfree((*repeat_options)->database);
153 (*repeat_options)->database = strdup(db);
154
155 return status;
156 }
157
158 Int2 SWindowMaskerOptionsResetDB(SWindowMaskerOptions ** winmask_options, const char* db)
159 {
160 Int2 status=0;
161
162 if (*winmask_options == NULL)
163 status = SWindowMaskerOptionsNew(winmask_options);
164
165 if (status)
166 return status;
167
168 sfree((*winmask_options)->database);
169
170 if (db) {
171 (*winmask_options)->database = strdup(db);
172 }
173
174 return status;
175 }
176
177 SReadQualityOptions* SReadQualityOptionsFree(
178 SReadQualityOptions* read_quality_options)
179 {
180 if (read_quality_options) {
181 free(read_quality_options);
182 }
183
184 return NULL;
185 }
186
187 Int2 SReadQualityOptionsNew(SReadQualityOptions** read_quality_options)
188 {
189 if (!read_quality_options) {
190 return 1;
191 }
192
193 *read_quality_options = calloc(1, sizeof(SReadQualityOptions));
194 if (!*read_quality_options) {
195 return 1;
196 }
197
198 (*read_quality_options)->frac_ambig = 0.5;
199 (*read_quality_options)->entropy = 16;
200
201 return 0;
202 }
203
204 SBlastFilterOptions* SBlastFilterOptionsFree(SBlastFilterOptions* filter_options)
205 {
206 if (filter_options)
207 {
208 filter_options->dustOptions =
209 SDustOptionsFree(filter_options->dustOptions);
210 filter_options->segOptions =
211 SSegOptionsFree(filter_options->segOptions);
212 filter_options->repeatFilterOptions =
213 SRepeatFilterOptionsFree(filter_options->repeatFilterOptions);
214 filter_options->windowMaskerOptions =
215 SWindowMaskerOptionsFree(filter_options->windowMaskerOptions);
216 filter_options->readQualityOptions =
217 SReadQualityOptionsFree(filter_options->readQualityOptions);
218 sfree(filter_options);
219 }
220
221 return NULL;
222 }
223
224 Int2 SBlastFilterOptionsNew(SBlastFilterOptions* *filter_options, EFilterOptions type)
225 {
226 Int2 status = 0;
227
228 if (filter_options)
229 {
230 *filter_options = (SBlastFilterOptions*) calloc(1, sizeof(SBlastFilterOptions));
231 (*filter_options)->mask_at_hash = FALSE;
232 if (type == eSeg)
233 SSegOptionsNew(&((*filter_options)->segOptions));
234 if (type == eDust || type == eDustRepeats)
235 SDustOptionsNew(&((*filter_options)->dustOptions));
236 if (type == eRepeats || type == eDustRepeats)
237 SRepeatFilterOptionsNew(&((*filter_options)->repeatFilterOptions));
238 }
239 else
240 status = 1;
241
242 return status;
243 }
244
245
246 /** Merges together two sets of dust options, choosing the most non-default one.
247 *
248 * @param opt1 first set to be merged [in]
249 * @param opt2 second set to be merged [in]
250 * @return the merged options.
251 */
252 static SDustOptions* s_MergeDustOptions(const SDustOptions* opt1, const SDustOptions* opt2)
253 {
254 SDustOptions* retval = NULL;
255
256 if (!opt1 && !opt2)
257 return NULL;
258
259 SDustOptionsNew(&retval);
260
261 if (opt1 && !opt2)
262 {
263 retval->level = opt1->level;
264 retval->window = opt1->window;
265 retval->linker = opt1->linker;
266 }
267 else if (!opt1 && opt2)
268 {
269 retval->level = opt2->level;
270 retval->window = opt2->window;
271 retval->linker = opt2->linker;
272 }
273 else
274 {
275 retval->level = (opt1->level != kDustLevel) ? opt1->level : opt2->level;
276 retval->window = (opt1->window != kDustWindow) ? opt1->window : opt2->window;
277 retval->linker = (opt1->linker != kDustLinker) ? opt1->linker : opt2->linker;
278 }
279
280 return retval;
281 }
282
283
284 /** Merges together two sets of SEG options, choosing the most non-default one.
285 *
286 * @param opt1 first set to be merged [in]
287 * @param opt2 second set to be merged [in]
288 * @return the merged options.
289 */
290 static SSegOptions* s_MergeSegOptions(const SSegOptions* opt1, const SSegOptions* opt2)
291 {
292 SSegOptions* retval = NULL;
293
294 if (!opt1 && !opt2)
295 return NULL;
296
297 SSegOptionsNew(&retval);
298
299 if (opt1 && !opt2)
300 {
301 retval->window = opt1->window;
302 retval->locut = opt1->locut;
303 retval->hicut = opt1->hicut;
304 }
305 else if (!opt1 && opt2)
306 {
307 retval->window = opt2->window;
308 retval->locut = opt2->locut;
309 retval->hicut = opt2->hicut;
310 }
311 else
312 {
313 retval->window = (opt1->window != kSegWindow) ? opt1->window : opt2->window;
314 retval->locut = (opt1->locut != kSegLocut) ? opt1->locut : opt2->locut;
315 retval->hicut = (opt1->hicut != kSegHicut) ? opt1->hicut : opt2->hicut;
316 }
317 return retval;
318 }
319
320 /** Merges together two sets of repeat filter options, choosing the most non-default one.
321 *
322 * @param opt1 first set to be merged [in]
323 * @param opt2 second set to be merged [in]
324 * @return the merged options.
325 */
326 static SRepeatFilterOptions* s_MergeRepeatOptions(const SRepeatFilterOptions* opt1, const SRepeatFilterOptions* opt2)
327 {
328 SRepeatFilterOptions* retval = NULL;
329
330 if (!opt1 && !opt2)
331 return NULL;
332
333 SRepeatFilterOptionsNew(&retval);
334
335 if (opt1 && !opt2)
336 {
337 SRepeatFilterOptionsResetDB(&retval, opt1->database);
338 }
339 else if (!opt1 && opt2)
340 {
341 SRepeatFilterOptionsResetDB(&retval, opt2->database);
342 }
343 else
344 { /* TODO : handle different db's. */
345 SRepeatFilterOptionsResetDB(&retval, opt2->database);
346 }
347 return retval;
348 }
349
350 /** Merges together two sets of window masker options, choosing the most non-default one.
351 *
352 * @param opt1 first set to be merged [in]
353 * @param opt2 second set to be merged [in]
354 * @return the merged options.
355 */
356 static SWindowMaskerOptions*
357 s_MergeWindowMaskerOptions(const SWindowMaskerOptions* opt1,
358 const SWindowMaskerOptions* opt2)
359 {
360 SWindowMaskerOptions* retval = NULL;
361 const SWindowMaskerOptions* src = NULL;
362 Boolean have1 = FALSE, have2 = FALSE;
363
364 have1 = opt1 && (opt1->database || opt1->taxid);
365 have2 = opt2 && (opt2->database || opt2->taxid);
366
367 if (! (have1 || have2))
368 return NULL;
369
370 if (have1 && ! have2) {
371 src = opt1;
372 } else if (! have1 && have2) {
373 src = opt2;
374 } else {
375 // We have data structures with some kind of content, so
376 // prefer structure 2 as repeat filter options do.
377 src = opt2;
378 }
379
380 ASSERT(src);
381 ASSERT(src->database || src->taxid);
382
383 SWindowMaskerOptionsNew(&retval);
384 SWindowMaskerOptionsResetDB(& retval, src->database);
385 retval->taxid = src->taxid;
386
387 return retval;
388 }
389
390 Int2 SBlastFilterOptionsMerge(SBlastFilterOptions** combined, const SBlastFilterOptions* opt1,
391 const SBlastFilterOptions* opt2)
392 {
393 SBlastFilterOptions* retval = NULL;
394 Int2 status = 0;
395
396 *combined = NULL;
397
398 if (opt1 == NULL && opt2 == NULL)
399 return 0;
400
401 status = SBlastFilterOptionsNew(&retval, eEmpty);
402 if (status != 0)
403 return status;
404
405 *combined = retval;
406
407 if ((opt1 && opt1->mask_at_hash) || (opt2 && opt2->mask_at_hash))
408 retval->mask_at_hash = TRUE;
409
410 retval->dustOptions =
411 s_MergeDustOptions(opt1 ? opt1->dustOptions : NULL, opt2 ? opt2->dustOptions : NULL);
412 retval->segOptions =
413 s_MergeSegOptions(opt1 ? opt1->segOptions : NULL, opt2 ? opt2->segOptions : NULL);
414 retval->repeatFilterOptions =
415 s_MergeRepeatOptions(opt1 ? opt1->repeatFilterOptions : NULL, opt2 ? opt2->repeatFilterOptions : NULL);
416 retval->windowMaskerOptions =
417 s_MergeWindowMaskerOptions(opt1 ? opt1->windowMaskerOptions : NULL, opt2 ? opt2->windowMaskerOptions : NULL);
418
419 return 0;
420 }
421
422 Boolean SBlastFilterOptionsNoFiltering(const SBlastFilterOptions* filter_options)
423 {
424 if (filter_options == NULL)
425 return TRUE;
426
427 return filter_options->dustOptions == NULL &&
428 filter_options->segOptions == NULL &&
429 filter_options->repeatFilterOptions == NULL &&
430 filter_options->windowMaskerOptions == NULL;
431 }
432
433 Boolean SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions* filter_options)
434 {
435 if (filter_options == NULL)
436 return FALSE;
437
438 return filter_options->mask_at_hash;
439 }
440
441 Int2 SBlastFilterOptionsValidate(EBlastProgramType program_number, const SBlastFilterOptions* filter_options, Blast_Message* *blast_message)
442 {
443 Int2 status = 0;
444
445 if (filter_options == NULL)
446 {
447 Blast_MessageWrite(blast_message, eBlastSevWarning, kBlastMessageNoContext,
448 "SBlastFilterOptionsValidate: NULL filter_options");
449 return BLASTERR_INVALIDPARAM;
450 }
451
452 if (filter_options->repeatFilterOptions)
453 {
454 if (program_number != eBlastTypeBlastn &&
455 program_number != eBlastTypeMapping)
456 {
457 if (blast_message)
458 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
459 "SBlastFilterOptionsValidate: Repeat filtering only supported with blastn");
460 return BLASTERR_OPTION_PROGRAM_INVALID;
461 }
462 if (filter_options->repeatFilterOptions->database == NULL ||
463 strlen(filter_options->repeatFilterOptions->database) == 0)
464 {
465 if (blast_message)
466 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
467 "SBlastFilterOptionsValidate: No repeat database specified for repeat filtering");
468 return BLASTERR_INVALIDPARAM;
469 }
470 }
471
472 if (filter_options->dustOptions)
473 {
474 if (program_number != eBlastTypeBlastn &&
475 program_number != eBlastTypeMapping)
476 {
477 if (blast_message)
478 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
479 "SBlastFilterOptionsValidate: Dust filtering only supported with blastn");
480 return BLASTERR_OPTION_PROGRAM_INVALID;
481 }
482 }
483
484 if (filter_options->segOptions)
485 {
486 if (program_number == eBlastTypeBlastn &&
487 program_number != eBlastTypeMapping)
488 {
489 if (blast_message)
490 Blast_MessageWrite(blast_message, eBlastSevError, kBlastMessageNoContext,
491 "SBlastFilterOptionsValidate: SEG filtering is not supported with blastn");
492 return BLASTERR_OPTION_PROGRAM_INVALID;
493 }
494 }
495
496 return status;
497 }
498
499
500 QuerySetUpOptions*
501 BlastQuerySetUpOptionsFree(QuerySetUpOptions* options)
502
503 {
504 if (options)
505 {
506 sfree(options->filter_string);
507 options->filtering_options = SBlastFilterOptionsFree(options->filtering_options);
508 sfree(options);
509 }
510 return NULL;
511 }
512
513 Int2
514 BlastQuerySetUpOptionsNew(QuerySetUpOptions* *options)
515 {
516 Int2 status = 0;
517
518 if (options == NULL)
519 return BLASTERR_INVALIDPARAM;
520
521 *options = (QuerySetUpOptions*) calloc(1, sizeof(QuerySetUpOptions));
522
523 if (*options == NULL)
524 return BLASTERR_MEMORY;
525
526 (*options)->genetic_code = BLAST_GENETIC_CODE;
527
528 /** @todo the code below should be deprecated */
529 status = SBlastFilterOptionsNew(&((*options)->filtering_options), eEmpty);
530
531 return status;
532 }
533
534 Int2 BLAST_FillQuerySetUpOptions(QuerySetUpOptions* options,
535 EBlastProgramType program, const char *filter_string, Uint1 strand_option)
536 {
537 Int2 status = 0;
538
539 if (options == NULL)
540 return BLASTERR_INVALIDPARAM;
541
542 if (strand_option &&
543 (program == eBlastTypeBlastn || program == eBlastTypePhiBlastn ||
544 program == eBlastTypeBlastx || program == eBlastTypeTblastx ||
545 program == eBlastTypeMapping)) {
546 options->strand_option = strand_option;
547 }
548
549 if (filter_string) {
550 /* Free whatever filter string has been set before. */
551 sfree(options->filter_string);
552 /* Free whatever filtering options have been set. */
553 options->filtering_options = SBlastFilterOptionsFree(options->filtering_options);
554 /* Parse the filter_string for options, do not save the string. */
555 status = BlastFilteringOptionsFromString(program, filter_string,
556 &options->filtering_options, NULL);
557 }
558 return status;
559 }
560
561 BlastInitialWordOptions*
562 BlastInitialWordOptionsFree(BlastInitialWordOptions* options)
563
564 {
565
566 sfree(options);
567
568 return NULL;
569 }
570
571
572 Int2
573 BlastInitialWordOptionsNew(EBlastProgramType program,
574 BlastInitialWordOptions* *options)
575 {
576 *options =
577 (BlastInitialWordOptions*) calloc(1, sizeof(BlastInitialWordOptions));
578 if (*options == NULL)
579 return BLASTERR_MEMORY;
580
581 if (/*program != eBlastTypeBlastn &&
582 program != eBlastTypePhiBlastn */
583 !Blast_ProgramIsNucleotide(program)) { /* protein-protein options. */
584 (*options)->window_size = BLAST_WINDOW_SIZE_PROT;
585 (*options)->x_dropoff = BLAST_UNGAPPED_X_DROPOFF_PROT;
586 (*options)->gap_trigger = BLAST_GAP_TRIGGER_PROT;
587 } else {
588 (*options)->window_size = BLAST_WINDOW_SIZE_NUCL;
589 (*options)->scan_range = BLAST_SCAN_RANGE_NUCL;
590 (*options)->gap_trigger = BLAST_GAP_TRIGGER_NUCL;
591 (*options)->x_dropoff = BLAST_UNGAPPED_X_DROPOFF_NUCL;
592 }
593
594 (*options)->program_number = program;
595
596 return 0;
597 }
598
599
600 Int2
601 BlastInitialWordOptionsValidate(EBlastProgramType program_number,
602 const BlastInitialWordOptions* options,
603 Blast_Message* *blast_msg)
604 {
605
606 ASSERT(options);
607
608 /* PHI-BLAST has no ungapped extension phase. Megablast may not have it,
609 but generally does now. */
610 if (program_number != eBlastTypeBlastn &&
611 program_number != eBlastTypeMapping &&
612 (!Blast_ProgramIsPhiBlast(program_number)) &&
613 options->x_dropoff scan_range && !options->window_size)
622 {
623 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
624 "off_diagonal_range is only useful in 2-hit algorithm");
625 return BLASTERR_OPTION_VALUE_INVALID;
626 }
627
628
629 return 0;
630 }
631
632
633 Int2
634 BLAST_FillInitialWordOptions(BlastInitialWordOptions* options,
635 EBlastProgramType program, Int4 window_size,
636 double xdrop_ungapped)
637 {
638 if (!options)
639 return BLASTERR_INVALIDPARAM;
640
641 if (window_size != 0)
642 options->window_size = window_size;
643 if (xdrop_ungapped != 0)
644 options->x_dropoff = xdrop_ungapped;
645
646 return 0;
647 }
648
649 BlastExtensionOptions*
650 BlastExtensionOptionsFree(BlastExtensionOptions* options)
651
652 {
653
654 sfree(options);
655
656 return NULL;
657 }
658
659 Int2
660 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions* *options, Boolean gapped)
661
662 {
663 *options = (BlastExtensionOptions*)
664 calloc(1, sizeof(BlastExtensionOptions));
665
666 if (*options == NULL)
667 return BLASTERR_MEMORY;
668
669 if (/* program != eBlastTypeBlastn &&
670 program != eBlastTypePhiBlastn*/
671 !Blast_ProgramIsNucleotide(program)) /* protein-protein options. */
672 {
673 (*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_PROT;
674 (*options)->gap_x_dropoff_final =
675 BLAST_GAP_X_DROPOFF_FINAL_PROT;
676 } else {
677 (*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
678 (*options)->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
679 }
680
681 (*options)->ePrelimGapExt = eDynProgScoreOnly;
682 (*options)->eTbackExt = eDynProgTbck;
683 (*options)->compositionBasedStats = eNoCompositionBasedStats;
684
685 /** @todo how to determine this for PSI-BLAST bootstrap run (i.e. when
686 * program is blastp? */
687 if (gapped && (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program))) {
688 (*options)->compositionBasedStats = eCompositionBasedStats;
689 }
690
691 (*options)->max_mismatches = 5;
692 (*options)->mismatch_window = 10;
693 (*options)->program_number = program;
694
695 return 0;
696 }
697
698 Int2
699 BLAST_FillExtensionOptions(BlastExtensionOptions* options,
700 EBlastProgramType program, Int4 greedy, double x_dropoff,
701 double x_dropoff_final)
702 {
703 if (!options)
704 return BLASTERR_INVALIDPARAM;
705
706 if (/*program == eBlastTypeBlastn || program == eBlastTypePhiBlastn*/
707 Blast_ProgramIsNucleotide(program)) {
708 if (greedy) {
709 options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_GREEDY;
710 options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
711 options->ePrelimGapExt = eGreedyScoreOnly;
712 options->eTbackExt = eGreedyTbck;
713 } else {
714 options->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
715 options->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
716 options->ePrelimGapExt = eDynProgScoreOnly;
717 options->eTbackExt = eDynProgTbck;
718 }
719 }
720
721 if (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program)) {
722 options->compositionBasedStats = eCompositionBasedStats;
723 }
724
725 if (x_dropoff)
726 options->gap_x_dropoff = x_dropoff;
727 if (x_dropoff_final) {
728 options->gap_x_dropoff_final = x_dropoff_final;
729 } else {
730 /* Final X-dropoff can't be smaller than preliminary X-dropoff */
731 options->gap_x_dropoff_final =
732 MAX(options->gap_x_dropoff_final, x_dropoff);
733 }
734
735 return 0;
736
737 }
738
739 Int2
740 BlastExtensionOptionsValidate(EBlastProgramType program_number,
741 const BlastExtensionOptions* options, Blast_Message* *blast_msg)
742
743 {
744 if (options == NULL)
745 return BLASTERR_INVALIDPARAM;
746
747 if (program_number != eBlastTypeBlastn &&
748 program_number != eBlastTypeMapping &&
749 (options->ePrelimGapExt == eGreedyScoreOnly ||
750 options->eTbackExt == eGreedyTbck))
751 {
752 Blast_MessageWrite(blast_msg, eBlastSevWarning,
753 kBlastMessageNoContext,
754 "Greedy extension only supported for BLASTN");
755 return BLASTERR_OPTION_PROGRAM_INVALID;
756 }
757
758 if ((options->ePrelimGapExt == eSmithWatermanScoreOnly &&
759 options->eTbackExt != eSmithWatermanTbckFull) ||
760 (options->ePrelimGapExt != eSmithWatermanScoreOnly &&
761 options->eTbackExt == eSmithWatermanTbckFull))
762 {
763 Blast_MessageWrite(blast_msg, eBlastSevWarning,
764 kBlastMessageNoContext,
765 "Score-only and traceback Smith-Waterman must "
766 "both be specified");
767 return BLASTERR_OPTION_VALUE_INVALID;
768 }
769
770 return 0;
771 }
772
773 BlastScoringOptions*
774 BlastScoringOptionsFree(BlastScoringOptions* options)
775
776 {
777 if (options == NULL)
778 return NULL;
779
780 sfree(options->matrix);
781 sfree(options->matrix_path);
782 sfree(options);
783
784 return NULL;
785 }
786
787 Int2
788 BlastScoringOptionsNew(EBlastProgramType program_number, BlastScoringOptions* *options)
789 {
790 *options = (BlastScoringOptions*) calloc(1, sizeof(BlastScoringOptions));
791
792 if (*options == NULL)
793 return BLASTERR_INVALIDPARAM;
794
795 if (/*program_number != eBlastTypeBlastn &&
796 program_number != eBlastTypePhiBlastn*/
797 !Blast_ProgramIsNucleotide(program_number)) {/*protein-protein options.*/
798 (*options)->shift_pen = INT2_MAX;
799 (*options)->is_ooframe = FALSE;
800 (*options)->gap_open = BLAST_GAP_OPEN_PROT;
801 (*options)->gap_extend = BLAST_GAP_EXTN_PROT;
802 (*options)->matrix = strdup(BLAST_DEFAULT_MATRIX);
803 } else { /* nucleotide-nucleotide options. */
804 (*options)->penalty = BLAST_PENALTY;
805 (*options)->reward = BLAST_REWARD;
806 /* This is correct except when greedy extension is used. In that case
807 these values would have to be reset. */
808 (*options)->gap_open = BLAST_GAP_OPEN_NUCL;
809 (*options)->gap_extend = BLAST_GAP_EXTN_NUCL;
810 }
811 if (program_number != eBlastTypeTblastx) {
812 (*options)->gapped_calculation = TRUE;
813 }
814 (*options)->program_number = program_number;
815 /* By default cross_match-like complexity adjusted scoring is
816 turned off. RMBlastN is currently the only program to use this. -RMH */
817 (*options)->complexity_adjusted_scoring = FALSE;
818
819 return 0;
820 }
821
822 Int2
823 BLAST_FillScoringOptions(BlastScoringOptions* options,
824 EBlastProgramType program_number, Boolean greedy_extension, Int4 penalty, Int4 reward,
825 const char *matrix, Int4 gap_open, Int4 gap_extend)
826 {
827 if (!options)
828 return BLASTERR_INVALIDPARAM;
829
830 if (/*program_number != eBlastTypeBlastn &&
831 program_number != eBlastTypePhiBlastn*/
832 !Blast_ProgramIsNucleotide(program_number)) {/* protein-protein options. */
833 /* If matrix name is not provided, keep the default "BLOSUM62" value filled in
834 BlastScoringOptionsNew, otherwise reset it. */
835 if (matrix)
836 BlastScoringOptionsSetMatrix(options, matrix);
837 } else { /* nucleotide-nucleotide options. */
838 if (penalty)
839 options->penalty = penalty;
840 if (reward)
841 options->reward = reward;
842
843 if (greedy_extension) {
844 options->gap_open = BLAST_GAP_OPEN_MEGABLAST;
845 options->gap_extend = BLAST_GAP_EXTN_MEGABLAST;
846 } else {
847 options->gap_open = BLAST_GAP_OPEN_NUCL;
848 options->gap_extend = BLAST_GAP_EXTN_NUCL;
849 }
850 }
851 if (gap_open >= 0)
852 options->gap_open = gap_open;
853 if (gap_extend >= 0)
854 options->gap_extend = gap_extend;
855
856 options->program_number = program_number;
857
858 return 0;
859 }
860
861 Int2
862 BlastScoringOptionsValidate(EBlastProgramType program_number,
863 const BlastScoringOptions* options, Blast_Message* *blast_msg)
864
865 {
866 if (options == NULL)
867 return BLASTERR_INVALIDPARAM;
868
869 if (program_number == eBlastTypeTblastx && options->gapped_calculation)
870 {
871 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
872 "Gapped search is not allowed for tblastx");
873 return BLASTERR_OPTION_PROGRAM_INVALID;
874 }
875
876 if (/*program_number == eBlastTypeBlastn || program_number == eBlastTypePhiBlastn*/
877 Blast_ProgramIsNucleotide(program_number))
878 {
879 // A penalty/reward of 0/0 is a signal that this is rmblastn
880 // which allows specification of penalties as positive integers.
881 if ( ! ( options->penalty == 0 && options->reward == 0 ) )
882 {
883 if (options->penalty >= 0)
884 {
885 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
886 "BLASTN penalty must be negative");
887 return BLASTERR_OPTION_VALUE_INVALID;
888 }
889
890 /* !!! this is temporary until there is jumper or mapping options handle */
891 if (0 && options->gapped_calculation &&
892 !Blast_ProgramIsMapping(program_number) &&
893 !BLAST_CheckRewardPenaltyScores(options->reward, options->penalty))
894 {
895 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
896 "BLASTN reward/penalty combination not supported for gapped search");
897 return BLASTERR_OPTION_VALUE_INVALID;
898 }
899 }
900
901 if (options->gapped_calculation && options->gap_open > 0 && options->gap_extend == 0)
902 {
903 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
904 "BLASTN gap extension penalty cannot be 0");
905 return BLASTERR_OPTION_VALUE_INVALID;
906 }
907 }
908 else
909 {
910 if (options->gapped_calculation && !Blast_ProgramIsRpsBlast(program_number))
911 {
912 Int2 status=0;
913 Boolean std_matrix_only =
914 (program_number != eBlastTypeBlastp &&
915 program_number != eBlastTypeTblastn);
916 if ((status=Blast_KarlinBlkGappedLoadFromTables(NULL, options->gap_open,
917 options->gap_extend, options->matrix, std_matrix_only)) != 0)
918 {
919 if (status == 1)
920 {
921 char* buffer;
922
923 buffer = BLAST_PrintMatrixMessage(options->matrix,
924 std_matrix_only);
925
926 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext, buffer);
927 sfree(buffer);
928 return BLASTERR_OPTION_VALUE_INVALID;
929
930 }
931 else if (status == 2)
932 {
933 char* buffer;
934
935 buffer = BLAST_PrintAllowedValues(options->matrix,
936 options->gap_open, options->gap_extend);
937 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext, buffer);
938 sfree(buffer);
939 return BLASTERR_OPTION_VALUE_INVALID;
940 }
941 }
942 }
943 }
944
945 if (program_number != eBlastTypeBlastx && program_number != eBlastTypeTblastn && options->is_ooframe)
946 {
947 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
948 "Out-of-frame only permitted for blastx and tblastn");
949 return BLASTERR_OPTION_PROGRAM_INVALID;
950 }
951
952 return 0;
953 }
954
955 Int2
956 BlastScoringOptionsDup(BlastScoringOptions* *new_opt, const BlastScoringOptions* old_opt)
957 {
958 if (old_opt == NULL || new_opt == NULL)
959 return BLASTERR_INVALIDPARAM;
960
961 *new_opt = (BlastScoringOptions*) BlastMemDup(old_opt, sizeof(BlastScoringOptions));
962 if (*new_opt == NULL)
963 return BLASTERR_MEMORY;
964
965 if (old_opt->matrix)
966 (*new_opt)->matrix = strdup(old_opt->matrix);
967
968 if (old_opt->matrix_path)
969 (*new_opt)->matrix_path = strdup(old_opt->matrix_path);
970
971 return 0;
972 }
973
974 Int2 BlastScoringOptionsSetMatrix(BlastScoringOptions* opts,
975 const char* matrix_name)
976 {
977 Uint4 i;
978
979 if (matrix_name) {
980 sfree(opts->matrix);
981 opts->matrix = strdup(matrix_name);
982 /* Make it all upper case */
983 for (i=0; imatrix); ++i)
984 opts->matrix[i] = toupper((unsigned char) opts->matrix[i]);
985 }
986 return 0;
987 }
988
989 BlastEffectiveLengthsOptions*
990 BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions* options)
991
992 {
993 if (options == NULL)
994 return NULL;
995
996 sfree(options->searchsp_eff);
997 sfree(options);
998 return NULL;
999 }
1000
1001
1002 Int2
1003 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions* *options)
1004
1005 {
1006 if (options == NULL) {
1007 return BLASTERR_INVALIDPARAM;
1008 }
1009
1010 *options = (BlastEffectiveLengthsOptions*)
1011 calloc(1, sizeof(BlastEffectiveLengthsOptions));
1012
1013 if (*options == NULL)
1014 return BLASTERR_MEMORY;
1015
1016 return 0;
1017 }
1018
1019 Boolean
1020 BlastEffectiveLengthsOptions_IsSearchSpaceSet(const
1021 BlastEffectiveLengthsOptions*
1022 options)
1023 {
1024 int i;
1025 if ( !options || options->searchsp_eff == NULL) {
1026 return FALSE;
1027 }
1028
1029 for (i = 0; i < options->num_searchspaces; i++) {
1030 if (options->searchsp_eff[i] != 0) {
1031 return TRUE;
1032 }
1033 }
1034 return FALSE;
1035 }
1036
1037 Int2
1038 BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions* options,
1039 Int4 dbseq_num, Int8 db_length, Int8* searchsp_eff, Int4 num_searchsp)
1040 {
1041 Int4 index;
1042 if (!options)
1043 return BLASTERR_INVALIDPARAM;
1044
1045 if (num_searchsp > options->num_searchspaces) {
1046 options->num_searchspaces = num_searchsp;
1047 options->searchsp_eff = (Int8 *)realloc(options->searchsp_eff,
1048 num_searchsp * sizeof(Int8));
1049 if (options->searchsp_eff == NULL)
1050 return BLASTERR_MEMORY;
1051 }
1052
1053 for (index = 0; index < options->num_searchspaces; index++)
1054 options->searchsp_eff[index] = searchsp_eff[index];
1055
1056 options->dbseq_num = dbseq_num;
1057 options->db_length = db_length;
1058
1059 return 0;
1060 }
1061
1062 LookupTableOptions*
1063 LookupTableOptionsFree(LookupTableOptions* options)
1064
1065 {
1066
1067 if (options == NULL)
1068 return NULL;
1069
1070 sfree(options->phi_pattern);
1071
1072 sfree(options);
1073 return NULL;
1074 }
1075
1076 Int2
1077 LookupTableOptionsNew(EBlastProgramType program_number, LookupTableOptions* *options)
1078 {
1079 *options = (LookupTableOptions*) calloc(1, sizeof(LookupTableOptions));
1080
1081 if (*options == NULL)
1082 return BLASTERR_INVALIDPARAM;
1083
1084 switch (program_number) {
1085 case eBlastTypeMapping:
1086 (*options)->max_db_word_count = MAX_DB_WORD_COUNT_MAPPER;
1087 case eBlastTypeBlastn:
1088 /* Blastn default is megablast. */
1089 (*options)->word_size = BLAST_WORDSIZE_MEGABLAST;
1090 (*options)->lut_type = eMBLookupTable;
1091 break;
1092 case eBlastTypeRpsBlast: case eBlastTypeRpsTblastn:
1093 (*options)->word_size = BLAST_WORDSIZE_PROT;
1094 (*options)->lut_type = eRPSLookupTable;
1095
1096 if (program_number == eBlastTypeRpsBlast)
1097 (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTP;
1098 else
1099 (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTN;
1100 break;
1101 case eBlastTypePhiBlastn:
1102 (*options)->lut_type = ePhiNaLookupTable;
1103 break;
1104 case eBlastTypePhiBlastp:
1105 (*options)->lut_type = ePhiLookupTable;
1106 break;
1107 default:
1108 (*options)->word_size = BLAST_WORDSIZE_PROT;
1109 (*options)->lut_type = eAaLookupTable;
1110
1111 if (program_number == eBlastTypeBlastp)
1112 (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTP;
1113 else if (program_number == eBlastTypeBlastx)
1114 (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTX;
1115 else if (program_number == eBlastTypeTblastn)
1116 (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTN;
1117 else if (program_number == eBlastTypeTblastx)
1118 (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTX;
1119 break;
1120 }
1121
1122 (*options)->program_number = program_number;
1123 (*options)->stride = 0;
1124
1125 return 0;
1126 }
1127
1128 Int2
1129 BLAST_FillLookupTableOptions(LookupTableOptions* options,
1130 EBlastProgramType program_number, Boolean is_megablast,
1131 double threshold, Int4 word_size)
1132 {
1133 if (!options)
1134 return BLASTERR_INVALIDPARAM;
1135
1136 if (program_number == eBlastTypeBlastn) {
1137
1138 if (is_megablast) {
1139 options->lut_type = eMBLookupTable;
1140 options->word_size = BLAST_WORDSIZE_MEGABLAST;
1141 } else {
1142 options->lut_type = eNaLookupTable;
1143 options->word_size = BLAST_WORDSIZE_NUCL;
1144 }
1145 } else if (program_number == eBlastTypeMapping) {
1146 options->lut_type = eNaHashLookupTable;
1147 options->word_size = BLAST_WORDSIZE_MAPPER;
1148 options->max_db_word_count = MAX_DB_WORD_COUNT_MAPPER;
1149 } else {
1150 options->lut_type = eAaLookupTable;
1151 }
1152
1153 /* if the supplied threshold is negative, disable neighboring words */
1154 if (threshold < 0)
1155 options->threshold = 0;
1156
1157 /* if the supplied threshold is > 0, use it otherwise, use the default */
1158 if (threshold > 0)
1159 options->threshold = threshold;
1160
1161 if (Blast_ProgramIsRpsBlast(program_number))
1162 options->lut_type = eRPSLookupTable;
1163 if (word_size)
1164 options->word_size = word_size;
1165 if ((program_number == eBlastTypeTblastn ||
1166 program_number == eBlastTypeBlastp ||
1167 program_number == eBlastTypeBlastx) &&
1168 word_size > 5)
1169 options->lut_type = eCompressedAaLookupTable;
1170
1171 return 0;
1172 }
1173
1174 Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char* matrixName, double* threshold)
1175 {
1176
1177 const double kB62_threshold = 11;
1178
1179 if (program_number == eBlastTypeBlastn ||
1180 program_number == eBlastTypeMapping)
1181 return 0;
1182
1183 if (matrixName == NULL)
1184 return BLASTERR_INVALIDPARAM;
1185
1186 if(strcasecmp(matrixName, "BLOSUM62") == 0)
1187 *threshold = kB62_threshold;
1188 else if(strcasecmp(matrixName, "BLOSUM45") == 0)
1189 *threshold = 14;
1190 else if(strcasecmp(matrixName, "BLOSUM62_20") == 0)
1191 *threshold = 100;
1192 else if(strcasecmp(matrixName, "BLOSUM80") == 0)
1193 *threshold = 12;
1194 else if(strcasecmp(matrixName, "PAM30") == 0)
1195 *threshold = 16;
1196 else if(strcasecmp(matrixName, "PAM70") == 0)
1197 *threshold = 14;
1198 else if(strcasecmp(matrixName, "IDENTITY") == 0)
1199 *threshold = 27;
1200 else
1201 *threshold = kB62_threshold;
1202
1203 if (Blast_SubjectIsTranslated(program_number) == TRUE)
1204 *threshold += 2; /* Covers tblastn, tblastx, psi-tblastn rpstblastn. */
1205 else if (Blast_QueryIsTranslated(program_number) == TRUE)
1206 *threshold += 1;
1207
1208 return 0;
1209 }
1210
1211 Int2 BLAST_GetSuggestedWindowSize(EBlastProgramType program_number, const char* matrixName, Int4* window_size)
1212 {
1213 const Int4 kB62_windowsize = 40;
1214
1215 if (program_number == eBlastTypeBlastn ||
1216 program_number == eBlastTypeMapping)
1217 return 0;
1218
1219 if (matrixName == NULL)
1220 return BLASTERR_INVALIDPARAM;
1221
1222 if(strcasecmp(matrixName, "BLOSUM62") == 0)
1223 *window_size = kB62_windowsize;
1224 else if(strcasecmp(matrixName, "BLOSUM45") == 0)
1225 *window_size = 60;
1226 else if(strcasecmp(matrixName, "BLOSUM80") == 0)
1227 *window_size = 25;
1228 else if(strcasecmp(matrixName, "PAM30") == 0)
1229 *window_size = 15;
1230 else if(strcasecmp(matrixName, "PAM70") == 0)
1231 *window_size = 20;
1232 else
1233 *window_size = kB62_windowsize;
1234
1235 return 0;
1236 }
1237
1238 /** Validate options for the discontiguous word megablast
1239 * Word size must be 11 or 12; template length 16, 18 or 21;
1240 * template type 0, 1 or 2.
1241 * @param word_size Word size option [in]
1242 * @param template_length Discontiguous template length [in]
1243 * @param template_type Discontiguous template type [in]
1244 * @param blast_msg Used for storing error messages [in][out]
1245 * @return TRUE if options combination valid.
1246 */
1247 static Boolean
1248 s_DiscWordOptionsValidate(Int4 word_size, Uint1 template_length,
1249 Uint1 template_type,
1250 Blast_Message** blast_msg)
1251 {
1252 if (template_length == 0)
1253 return TRUE;
1254
1255
1256 if (word_size != 11 && word_size != 12) {
1257 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1258 "Invalid discontiguous template parameters: word "
1259 "size must be either 11 or 12");
1260 return FALSE;
1261 }
1262
1263 if (template_length != 16 && template_length != 18 &&
1264 template_length != 21) {
1265 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1266 "Invalid discontiguous template parameters: "
1267 "template length must be 16, 18, or 21");
1268 return FALSE;
1269 }
1270
1271 if (template_type > 2) {
1272 /* should never fail coming from the C++ APIs as we represent these as
1273 * strings */
1274 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1275 "Invalid discontiguous template parameters: "
1276 "template type must be 0, 1, or 2");
1277 return FALSE;
1278 }
1279
1280 return TRUE;
1281 }
1282
1283 Int2
1284 LookupTableOptionsValidate(EBlastProgramType program_number,
1285 const LookupTableOptions* options, Blast_Message* *blast_msg)
1286
1287 {
1288 const Boolean kPhiBlast = Blast_ProgramIsPhiBlast(program_number);
1289
1290 if (options == NULL)
1291 return BLASTERR_INVALIDPARAM;
1292
1293 if (options->phi_pattern && !kPhiBlast) {
1294 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1295 "PHI pattern can be specified only for blastp and blastn");
1296 return BLASTERR_OPTION_PROGRAM_INVALID;
1297 }
1298
1299 /* For PHI BLAST, the subsequent word size tests are not needed. */
1300 if (kPhiBlast)
1301 return 0;
1302
1303 if (program_number != eBlastTypeBlastn &&
1304 program_number != eBlastTypeMapping &&
1305 (!Blast_ProgramIsRpsBlast(program_number)) &&
1306 options->threshold word_size word_size < 4)
1323 {
1324 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1325 "Word-size must be 4 or greater for nucleotide comparison");
1326 return BLASTERR_OPTION_VALUE_INVALID;
1327 } else if (program_number != eBlastTypeBlastn &&
1328 program_number != eBlastTypeMapping && options->word_size > 5)
1329 {
1330 if (program_number == eBlastTypeBlastp ||
1331 program_number == eBlastTypeTblastn ||
1332 program_number == eBlastTypeBlastx)
1333 {
1334 if (options->word_size > 7) {
1335 Blast_MessageWrite(blast_msg, eBlastSevError,
1336 kBlastMessageNoContext,
1337 "Word-size must be less than "
1338 "8 for a tblastn, blastp or blastx search");
1339 return BLASTERR_OPTION_VALUE_INVALID;
1340 }
1341 }
1342 else {
1343 Blast_MessageWrite(blast_msg, eBlastSevError,
1344 kBlastMessageNoContext,
1345 "Word-size must be less "
1346 "than 6 for protein comparison");
1347 return BLASTERR_OPTION_VALUE_INVALID;
1348 }
1349 }
1350
1351 if (program_number != eBlastTypeBlastn &&
1352 program_number != eBlastTypeMapping &&
1353 options->lut_type == eMBLookupTable)
1354 {
1355 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1356 "Megablast lookup table only supported with blastn");
1357 return BLASTERR_OPTION_PROGRAM_INVALID;
1358 }
1359
1360 if (program_number == eBlastTypeBlastp ||
1361 program_number == eBlastTypeTblastn ||
1362 program_number == eBlastTypeBlastx)
1363 {
1364 if (options->word_size > 5 &&
1365 options->lut_type != eCompressedAaLookupTable) {
1366 Blast_MessageWrite(blast_msg, eBlastSevError,
1367 kBlastMessageNoContext,
1368 "Blastp, Blastx or Tblastn with word size"
1369 " > 5 requires a "
1370 "compressed alphabet lookup table");
1371 return BLASTERR_OPTION_VALUE_INVALID;
1372 }
1373 else if (options->lut_type == eCompressedAaLookupTable &&
1374 options->word_size != 5 && options->word_size != 6 &&
1375 options->word_size != 7) {
1376 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1377 "Compressed alphabet lookup table requires "
1378 "word size 5, 6 or 7");
1379 return BLASTERR_OPTION_VALUE_INVALID;
1380 }
1381 }
1382
1383 if (/*program_number == eBlastTypeBlastn &&*/
1384 Blast_ProgramIsNucleotide(program_number) &&
1385 !Blast_QueryIsPattern(program_number) &&
1386 options->mb_template_length > 0) {
1387 if (!s_DiscWordOptionsValidate(options->word_size,
1388 options->mb_template_length,
1389 options->mb_template_type,
1390 blast_msg)) {
1391 return BLASTERR_OPTION_VALUE_INVALID;
1392 } else if (options->lut_type != eMBLookupTable) {
1393 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1394 "Invalid lookup table type for discontiguous Mega BLAST");
1395 return BLASTERR_OPTION_VALUE_INVALID;
1396 }
1397 }
1398
1399 if (!Blast_ProgramIsNucleotide(program_number) && options->db_filter) {
1400 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1401 "The limit_lookup option can only be used for "
1402 "nucleotide searches");
1403 return BLASTERR_OPTION_VALUE_INVALID;
1404 }
1405
1406 if (options->db_filter && options->word_size < 16) {
1407 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1408 "The limit_lookup option can only be used with "
1409 "word size >= 16");
1410 return BLASTERR_OPTION_VALUE_INVALID;
1411 }
1412
1413 return 0;
1414 }
1415
1416 BlastHitSavingOptions*
1417 BlastHitSavingOptionsFree(BlastHitSavingOptions* options)
1418
1419 {
1420 if (options) {
1421 options->hsp_filt_opt = BlastHSPFilteringOptionsFree(options->hsp_filt_opt);
1422 }
1423 sfree(options);
1424 return NULL;
1425 }
1426
1427
1428 Int2 BlastHitSavingOptionsNew(EBlastProgramType program_number,
1429 BlastHitSavingOptions** options,
1430 Boolean gapped_calculation)
1431 {
1432 *options = (BlastHitSavingOptions*) calloc(1, sizeof(BlastHitSavingOptions));
1433
1434 if (*options == NULL)
1435 return BLASTERR_INVALIDPARAM;
1436
1437 (*options)->hitlist_size = BLAST_HITLIST_SIZE;
1438 (*options)->expect_value = BLAST_EXPECT_VALUE;
1439 (*options)->program_number = program_number;
1440
1441 // Initialize mask_level parameter -RMH-
1442 (*options)->mask_level = 101;
1443
1444 /* By default, sum statistics is used for all translated searches
1445 * (except RPS BLAST), and for all ungapped searches.
1446 */
1447 if (program_number == eBlastTypeRpsTblastn) {
1448 (*options)->do_sum_stats = FALSE;
1449 } else if (!gapped_calculation ||
1450 Blast_QueryIsTranslated(program_number) ||
1451 Blast_SubjectIsTranslated(program_number)) {
1452 (*options)->do_sum_stats = TRUE;
1453 } else {
1454 (*options)->do_sum_stats = FALSE;
1455 }
1456
1457 (*options)->hsp_filt_opt = NULL;
1458
1459 (*options)->max_edit_distance = INT4_MAX;
1460
1461 return 0;
1462
1463 }
1464
1465 Int2
1466 BLAST_FillHitSavingOptions(BlastHitSavingOptions* options,
1467 double evalue, Int4 hitlist_size,
1468 Boolean is_gapped, Int4 culling_limit,
1469 Int4 min_diag_separation)
1470 {
1471 if (!options)
1472 return BLASTERR_INVALIDPARAM;
1473
1474 if (hitlist_size)
1475 options->hitlist_size = hitlist_size;
1476 if (evalue)
1477 options->expect_value = evalue;
1478 if (min_diag_separation)
1479 options->min_diag_separation = min_diag_separation;
1480 options->culling_limit = culling_limit;
1481 options->hsp_filt_opt = NULL;
1482 options->max_edit_distance = INT4_MAX;
1483
1484 return 0;
1485
1486 }
1487
1488 Int2
1489 BlastHitSavingOptionsValidate(EBlastProgramType program_number,
1490 const BlastHitSavingOptions* options, Blast_Message* *blast_msg)
1491 {
1492 if (options == NULL)
1493 return BLASTERR_INVALIDPARAM;
1494
1495 if (options->hitlist_size < 1)
1496 {
1497 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1498 "No hits are being saved");
1499 return BLASTERR_OPTION_VALUE_INVALID;
1500 }
1501
1502 if (options->expect_value cutoff_score longest_intron != 0 &&
1510 program_number != eBlastTypeTblastn &&
1511 program_number != eBlastTypePsiTblastn &&
1512 program_number != eBlastTypeBlastx &&
1513 program_number != eBlastTypeMapping) {
1514 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1515 "Uneven gap linking of HSPs is allowed for blastx, "
1516 "tblastn, and psitblastn only");
1517 return BLASTERR_OPTION_PROGRAM_INVALID;
1518 }
1519
1520 if (options->culling_limit < 0)
1521 {
1522 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1523 "culling limit must be greater than or equal to zero");
1524 return BLASTERR_OPTION_VALUE_INVALID;
1525 }
1526
1527 if (options->hsp_filt_opt) {
1528 if (BlastHSPFilteringOptionsValidate(options->hsp_filt_opt) != 0) {
1529 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1530 "HSP Filtering options invalid");
1531 return BLASTERR_OPTION_VALUE_INVALID;
1532 }
1533 }
1534
1535 return 0;
1536 }
1537
1538 Int2 PSIBlastOptionsNew(PSIBlastOptions** psi_options)
1539 {
1540 PSIBlastOptions* options = NULL;
1541
1542 if ( !psi_options )
1543 return BLASTERR_INVALIDPARAM;
1544
1545 options = (PSIBlastOptions*)calloc(1, sizeof(PSIBlastOptions));
1546 if ( !options )
1547 return BLASTERR_MEMORY;
1548
1549 *psi_options = options;
1550 options->inclusion_ethresh = PSI_INCLUSION_ETHRESH;
1551 options->pseudo_count = PSI_PSEUDO_COUNT_CONST;
1552 options->use_best_alignment = TRUE;
1553
1554 options->nsg_compatibility_mode = FALSE;
1555 options->impala_scaling_factor = kPSSM_NoImpalaScaling;
1556 options->ignore_unaligned_positions = FALSE;
1557
1558 return 0;
1559 }
1560
1561 Int2 PSIBlastOptionsValidate(const PSIBlastOptions* psi_options,
1562 Blast_Message** blast_msg)
1563 {
1564 Int2 retval = 1; /* assume failure */
1565
1566 if ( !psi_options ) {
1567 return retval;
1568 }
1569
1570 if (psi_options->pseudo_count < 0) {
1571 Blast_MessageWrite(blast_msg, eBlastSevError, kBlastMessageNoContext,
1572 "Pseudo count must be greater than or equal to 0");
1573 return retval;
1574 }
1575
1576 if (psi_options->inclusion_ethresh genetic_code = BLAST_GENETIC_CODE;
1606 *db_options = options;
1607
1608 return 0;
1609 }
1610
1611 BlastDatabaseOptions*
1612 BlastDatabaseOptionsFree(BlastDatabaseOptions* db_options)
1613 {
1614
1615 if (db_options == NULL)
1616 return NULL;
1617
1618 sfree(db_options);
1619 return NULL;
1620 }
1621
1622 Int2 BLAST_InitDefaultOptions(EBlastProgramType program_number,
1623 LookupTableOptions** lookup_options,
1624 QuerySetUpOptions** query_setup_options,
1625 BlastInitialWordOptions** word_options,
1626 BlastExtensionOptions** ext_options,
1627 BlastHitSavingOptions** hit_options,
1628 BlastScoringOptions** score_options,
1629 BlastEffectiveLengthsOptions** eff_len_options,
1630 PSIBlastOptions** psi_options,
1631 BlastDatabaseOptions** db_options)
1632 {
1633 Int2 status;
1634
1635 if ((status = LookupTableOptionsNew(program_number, lookup_options)))
1636 return status;
1637
1638 if ((status=BlastQuerySetUpOptionsNew(query_setup_options)))
1639 return status;
1640
1641 if ((status=BlastInitialWordOptionsNew(program_number, word_options)))
1642 return status;
1643
1644 if ((status=BlastScoringOptionsNew(program_number, score_options)))
1645 return status;
1646
1647 if ((status = BlastExtensionOptionsNew(program_number, ext_options,
1648 (*score_options)->gapped_calculation)))
1649 return status;
1650
1651 if ((status=BlastHitSavingOptionsNew(program_number, hit_options,
1652 (*score_options)->gapped_calculation)))
1653 return status;
1654
1655 if ((status=BlastEffectiveLengthsOptionsNew(eff_len_options)))
1656 return status;
1657
1658 if ((status=PSIBlastOptionsNew(psi_options)))
1659 return status;
1660
1661 if ((status=BlastDatabaseOptionsNew(db_options)))
1662 return status;
1663
1664 return 0;
1665
1666 }
1667
1668 /** Checks that the extension and scoring options are consistent with each other
1669 * @param program_number identifies the program [in]
1670 * @param ext_options the extension options [in]
1671 * @param score_options the scoring options [in]
1672 * @param blast_msg returns a message on errors. [in|out]
1673 * @return zero on success, an error code otherwise.
1674 */
1675 static Int2 s_BlastExtensionScoringOptionsValidate(EBlastProgramType program_number,
1676 const BlastExtensionOptions* ext_options,
1677 const BlastScoringOptions* score_options,
1678 Blast_Message* *blast_msg)
1679 {
1680 if (ext_options == NULL || score_options == NULL)
1681 return BLASTERR_INVALIDPARAM;
1682
1683 if (program_number == eBlastTypeBlastn)
1684 {
1685 if (score_options->gap_open == 0 && score_options->gap_extend == 0)
1686 {
1687 if (ext_options->ePrelimGapExt != eGreedyScoreOnly &&
1688 ext_options->eTbackExt != eGreedyTbck)
1689 {
1690 Blast_MessageWrite(blast_msg, eBlastSevWarning,
1691 kBlastMessageNoContext,
1692 "Greedy extension must be used if gap existence and extension options are zero");
1693 return BLASTERR_OPTION_VALUE_INVALID;
1694 }
1695 }
1696 }
1697
1698 if (program_number == eBlastTypeMapping) {
1699 if (ext_options->ePrelimGapExt != eJumperWithTraceback) {
1700
1701 Blast_MessageWrite(blast_msg, eBlastSevWarning,
1702 kBlastMessageNoContext,
1703 "Jumper extension must be used for mapping");
1704
1705 return BLASTERR_OPTION_VALUE_INVALID;
1706 }
1707 }
1708
1709 if (ext_options->compositionBasedStats != eNoCompositionBasedStats)
1710 {
1711 if (!Blast_QueryIsPssm(program_number) && program_number != eBlastTypeTblastn &&
1712 program_number != eBlastTypeBlastp &&
1713 program_number != eBlastTypeBlastx &&
1714 program_number != eBlastTypeRpsBlast &&
1715 program_number != eBlastTypeRpsTblastn &&
1716 program_number != eBlastTypePsiBlast) {
1717 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
1718 "Compositional adjustments are only supported with blastp, blastx, or tblastn");
1719 return BLASTERR_OPTION_VALUE_INVALID;
1720 }
1721 if (!score_options->gapped_calculation) {
1722 Blast_MessageWrite(blast_msg, eBlastSevWarning, kBlastMessageNoContext,
1723 "Compositional adjustments are only supported for gapped searches");
1724 return BLASTERR_OPTION_VALUE_INVALID;
1725 }
1726
1727 }
1728
1729 return 0;
1730 }
1731
1732
1733 Int2 BLAST_ValidateOptions(EBlastProgramType program_number,
1734 const BlastExtensionOptions* ext_options,
1735 const BlastScoringOptions* score_options,
1736 const LookupTableOptions* lookup_options,
1737 const BlastInitialWordOptions* word_options,
1738 const BlastHitSavingOptions* hit_options,
1739 Blast_Message* *blast_msg)
1740 {
1741 Int2 status = 0;
1742
1743 if ((status = BlastExtensionOptionsValidate(program_number, ext_options,
1744 blast_msg)) != 0)
1745 return status;
1746 if ((status = BlastScoringOptionsValidate(program_number, score_options,
1747 blast_msg)) != 0)
1748 return status;
1749 if ((status = LookupTableOptionsValidate(program_number,
1750 lookup_options, blast_msg)) != 0)
1751 return status;
1752 if ((status = BlastInitialWordOptionsValidate(program_number,
1753 word_options, blast_msg)) != 0)
1754 return status;
1755 if ((status = BlastHitSavingOptionsValidate(program_number, hit_options,
1756 blast_msg)) != 0)
1757 return status;
1758 if ((status = s_BlastExtensionScoringOptionsValidate(program_number, ext_options,
1759 score_options, blast_msg)) != 0)
1760 return status;
1761
1762 /* Word sizes larger than 5 are not suported for IDENTITY scoring matrix.
1763 Identity matrix is only supported for blastp and tblastn. */
1764 if (program_number == eBlastTypeBlastp ||
1765 program_number == eBlastTypeTblastn) {
1766
1767 char* matrix = BLAST_StrToUpper(score_options->matrix);
1768 Boolean is_identity = strcmp(matrix, "IDENTITY") == 0;
1769
1770 if (matrix) {
1771 free(matrix);
1772 }
1773
1774 if (lookup_options->word_size > 5 && is_identity) {
1775
1776 Blast_MessageWrite(blast_msg, eBlastSevError,
1777 kBlastMessageNoContext,
1778 "Word size larger than 5 is not supported for "
1779 "the identity scoring matrix");
1780
1781 return BLASTERR_OPTION_VALUE_INVALID;
1782 }
1783 }
1784
1785 if (program_number == eBlastTypeRpsBlast ||
1786 program_number == eBlastTypeRpsTblastn) {
1787 if((hit_options->culling_limit != 0) || (hit_options->hsp_filt_opt != NULL)) {
1788 Blast_MessageWrite(blast_msg, eBlastSevError,
1789 kBlastMessageNoContext,
1790 "Culling or best hit filtering is not supported");
1791 return BLASTERR_OPTION_VALUE_INVALID;
1792 }
1793 }
1794
1795 return status;
1796 }
1797
1798 BlastHSPBestHitOptions* BlastHSPBestHitOptionsNew(double overhang, double score_edge)
1799 {
1800 BlastHSPBestHitOptions* retval =
1801 (BlastHSPBestHitOptions*) calloc(1, sizeof(BlastHSPBestHitOptions));
1802 retval->overhang = overhang;
1803 retval->score_edge = score_edge;
1804 return retval;
1805 }
1806
1807 Int2
1808 BlastHSPBestHitOptionsValidate(const BlastHSPFilteringOptions* opts)
1809 {
1810 Int2 retval = 0; /* assume success */
1811 BlastHSPBestHitOptions* best_hit = opts->best_hit;
1812
1813 if ( !best_hit ) {
1814 return retval;
1815 }
1816
1817 if (best_hit->overhang overhang >= kBestHit_OverhangMax) {
1819 return -1;
1820 }
1821
1822 if (best_hit->score_edge score_edge >= kBestHit_ScoreEdgeMax) {
1824 return -1;
1825 }
1826
1827 return retval;
1828 }
1829
1830 BlastHSPBestHitOptions* BlastHSPBestHitOptionsFree(BlastHSPBestHitOptions* opt)
1831 {
1832 if ( !opt ) {
1833 return NULL;
1834 }
1835 sfree(opt);
1836 return NULL;
1837 }
1838
1839 BlastHSPCullingOptions* BlastHSPCullingOptionsNew(int max)
1840 {
1841 BlastHSPCullingOptions* retval =
1842 (BlastHSPCullingOptions*) calloc(1, sizeof(BlastHSPCullingOptions));
1843 retval->max_hits = max;
1844 return retval;
1845 }
1846
1847 Int2
1848 BlastHSPCullingOptionsValidate(const BlastHSPFilteringOptions* opts)
1849 {
1850 Int2 retval = 0;
1851 BlastHSPCullingOptions* culling_opts = opts->culling_opts;
1852 if (!culling_opts)
1853 return retval;
1854
1855 if (culling_opts->max_hits < 0)
1856 return -1;
1857
1858 return retval;
1859 }
1860
1861 BlastHSPCullingOptions*
1862 BlastHSPCullingOptionsFree(BlastHSPCullingOptions* culling_opts)
1863 {
1864 if (!culling_opts)
1865 return NULL;
1866
1867 sfree(culling_opts);
1868 return NULL;
1869 }
1870
1871
1872 BlastHSPFilteringOptions* BlastHSPFilteringOptionsNew()
1873 {
1874 return (BlastHSPFilteringOptions*)calloc(1,
1875 sizeof(BlastHSPFilteringOptions));
1876 }
1877
1878 Int2
1879 BlastHSPFilteringOptions_AddBestHit(BlastHSPFilteringOptions* filt_opts,
1880 BlastHSPBestHitOptions** best_hit,
1881 EBlastStage stage)
1882 {
1883 if ( filt_opts == NULL || best_hit == NULL || *best_hit == NULL) {
1884 return 1;
1885 }
1886
1887 filt_opts->best_hit = *best_hit;
1888 *best_hit = NULL;
1889 filt_opts->best_hit_stage = stage;
1890
1891 return 0;
1892 }
1893
1894 Int2
1895 BlastHSPFilteringOptions_AddCulling(BlastHSPFilteringOptions* filt_opts,
1896 BlastHSPCullingOptions** culling,
1897 EBlastStage stage)
1898 {
1899 if ( filt_opts == NULL || culling == NULL || *culling == NULL) {
1900 return 1;
1901 }
1902
1903 filt_opts->culling_opts = *culling;
1904 *culling = NULL;
1905 filt_opts->culling_stage = stage;
1906
1907 return 0;
1908 }
1909
1910 Int2
1911 BlastHSPFilteringOptionsValidate(const BlastHSPFilteringOptions* opts)
1912 {
1913 Int2 retval = 0; /* assume success */
1914 Boolean writer_found = FALSE;
1915
1916 if ( (retval = BlastHSPBestHitOptionsValidate(opts)) != 0) {
1917 return retval;
1918 }
1919 if (opts->best_hit_stage & ePrelimSearch) {
1920 writer_found = TRUE;
1921 }
1922
1923 if ( (retval = BlastHSPCullingOptionsValidate(opts)) != 0) {
1924 return retval;
1925 }
1926 if ((opts->culling_stage & ePrelimSearch) && writer_found) {
1927 return 1;
1928 }
1929
1930 return retval;
1931 }
1932
1933 BlastHSPFilteringOptions*
1934 BlastHSPFilteringOptionsFree(BlastHSPFilteringOptions* opts)
1935 {
1936 if ( !opts ) {
1937 return NULL;
1938 }
1939 opts->best_hit = BlastHSPBestHitOptionsFree(opts->best_hit);
1940 opts->culling_opts = BlastHSPCullingOptionsFree(opts->culling_opts);
1941 opts->subject_besthit_opts = BlastHSPSubjectBestHitOptionsFree(opts->subject_besthit_opts);
1942 sfree(opts);
1943 return opts;
1944 }
1945
1946 BlastHSPSubjectBestHitOptions*
1947 BlastHSPSubjectBestHitOptionsNew(Boolean isProtein)
1948 {
1949 BlastHSPSubjectBestHitOptions* retval =
1950 (BlastHSPSubjectBestHitOptions*) calloc(1, sizeof(BlastHSPSubjectBestHitOptions));
1951 if(isProtein){
1952 retval->max_range_diff = DEFAULT_SUBJECT_BESTHIT_PROT_MAX_RANGE_DIFF;
1953 }
1954 else {
1955 retval->max_range_diff = DEFAULT_SUBJECT_BESTHIT_NUCL_MAX_RANGE_DIFF;
1956 }
1957 return retval;
1958 }
1959
1960 Int2
1961 BlastHSPSubjectBestHitOptionsValidate(const BlastHSPFilteringOptions* opts)
1962 {
1963 Int2 retval = 0;
1964 BlastHSPSubjectBestHitOptions* besthit_opts = opts->subject_besthit_opts;
1965 if (!besthit_opts)
1966 return retval;
1967
1968 return retval;
1969 }
1970
1971 BlastHSPSubjectBestHitOptions*
1972 BlastHSPSubjectBestHitOptionsFree(BlastHSPSubjectBestHitOptions* subject_besthit_opts)
1973 {
1974 if (!subject_besthit_opts)
1975 return NULL;
1976
1977 sfree(subject_besthit_opts);
1978 return NULL;
1979 }
1980
1981 Int2
1982 BlastHSPFilteringOptions_AddSubjectBestHit(BlastHSPFilteringOptions* filt_opts,
1983 BlastHSPSubjectBestHitOptions** subject_besthit)
1984 {
1985 if ( filt_opts == NULL || subject_besthit == NULL || *subject_besthit == NULL) {
1986 return 1;
1987 }
1988
1989 filt_opts->subject_besthit_opts = *subject_besthit;
1990 *subject_besthit = NULL;
1991
1992 return 0;
1993 }
1994
1995
1996
sfree#define sfree(x)Safe free a pointer: belongs to a higher level header.Definition: blast_def.h:112
kSegLocutconst double kSegLocutLocut parameter for SEG.Definition: blast_seg.c:46
kSegWindowconst int kSegWindowWindow that SEG examines at once.Definition: blast_seg.c:45
kSegHicutconst double kSegHicutHicut parameter for SEG.Definition: blast_seg.c:47
EBlastStageEBlastStageEnumeration for the stages in the BLAST search.Definition: blast_def.h:324
ePrelimSearch@ ePrelimSearchPreliminary stage.Definition: blast_def.h:328
blast_filter.hBLAST filtering functions.
BlastFilteringOptionsFromStringInt2 BlastFilteringOptionsFromString(EBlastProgramType program_number, const char *instructions, SBlastFilterOptions **filtering_options, Blast_Message **blast_message)Produces SBlastFilterOptions from a string that has been traditionally supported in blast.Definition: blast_filter.c:436
eBlastSevError@ eBlastSevErrorDefinition: blast_message.h:58
eBlastSevWarning@ eBlastSevWarningDefinition: blast_message.h:57
BLASTERR_OPTION_PROGRAM_INVALID#define BLASTERR_OPTION_PROGRAM_INVALIDThe option is not supported with the specified program.Definition: blast_message.h:165
BLASTERR_OPTION_VALUE_INVALID#define BLASTERR_OPTION_VALUE_INVALIDThe value of the option is not supported (e.g., word size too small)Definition: blast_message.h:168
Blast_MessageWriteInt2 Blast_MessageWrite(Blast_Message **blast_msg, EBlastSeverity severity, int context, const char *message)Writes a message to a structure.Definition: blast_message.c:102
kBlastMessageNoContextconst int kBlastMessageNoContextDeclared in blast_message.h as extern const.Definition: blast_message.c:36
BLASTERR_MEMORY#define BLASTERR_MEMORYSystem error: out of memory condition.Definition: blast_message.h:143
BLASTERR_INVALIDPARAM#define BLASTERR_INVALIDPARAMInvalid parameter: possible programmer error or pre-condition not met.Definition: blast_message.h:146
PSIBlastOptionsFreePSIBlastOptions * PSIBlastOptionsFree(PSIBlastOptions *psi_options)Deallocate PSI BLAST options.Definition: blast_options.c:1586
BLAST_FillQuerySetUpOptionsInt2 BLAST_FillQuerySetUpOptions(QuerySetUpOptions *options, EBlastProgramType program, const char *filter_string, Uint1 strand_option)Fill non-default contents of the QuerySetUpOptions.Definition: blast_options.c:534
BlastHSPCullingOptionsNewBlastHSPCullingOptions * BlastHSPCullingOptionsNew(int max)Allocate a new object for culling options.Definition: blast_options.c:1839
BlastDatabaseOptionsNewInt2 BlastDatabaseOptionsNew(BlastDatabaseOptions **db_options)Allocates the BlastDatabase options structure and sets the default database genetic code value (BLAST...Definition: blast_options.c:1592
SWindowMaskerOptionsResetDBInt2 SWindowMaskerOptionsResetDB(SWindowMaskerOptions **winmask_options, const char *db)Resets name of db for window masker filtering.Definition: blast_options.c:158
SRepeatFilterOptionsNewInt2 SRepeatFilterOptionsNew(SRepeatFilterOptions **repeat_options)Allocates memory for SRepeatFilterOptions, fills in defaults.Definition: blast_options.c:127
PSIBlastOptionsValidateInt2 PSIBlastOptionsValidate(const PSIBlastOptions *psi_options, Blast_Message **blast_msg)Validates the PSI BLAST options so that they have sane values.Definition: blast_options.c:1561
BlastHSPBestHitOptionsValidateInt2 BlastHSPBestHitOptionsValidate(const BlastHSPFilteringOptions *opts)Validate the best hit algorithm parameters (if any) in the.Definition: blast_options.c:1808
BLAST_ValidateOptionsInt2 BLAST_ValidateOptions(EBlastProgramType program_number, const BlastExtensionOptions *ext_options, const BlastScoringOptions *score_options, const LookupTableOptions *lookup_options, const BlastInitialWordOptions *word_options, const BlastHitSavingOptions *hit_options, Blast_Message **blast_msg)Validate all options.Definition: blast_options.c:1733
BlastHSPFilteringOptions_AddCullingInt2 BlastHSPFilteringOptions_AddCulling(BlastHSPFilteringOptions *filt_opts, BlastHSPCullingOptions **culling, EBlastStage stage)Validates the BlastHSPFilteringOptions structure.Definition: blast_options.c:1895
BlastHitSavingOptionsFreeBlastHitSavingOptions * BlastHitSavingOptionsFree(BlastHitSavingOptions *options)Deallocate memory for BlastHitSavingOptions.Definition: blast_options.c:1417
BlastScoringOptionsValidateInt2 BlastScoringOptionsValidate(EBlastProgramType program_number, const BlastScoringOptions *options, Blast_Message **blast_msg)Validate contents of BlastScoringOptions.Definition: blast_options.c:862
BlastQuerySetUpOptionsNewInt2 BlastQuerySetUpOptionsNew(QuerySetUpOptions **options)Allocate memory for QuerySetUpOptions and fill with default values.Definition: blast_options.c:514
BLAST_GetSuggestedThresholdInt2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char *matrixName, double *threshold)Get thresholds for word-finding suggested by Stephen Altschul.Definition: blast_options.c:1174
SDustOptionsFreeSDustOptions * SDustOptionsFree(SDustOptions *dust_options)Frees SDustOptions.Definition: blast_options.c:50
BLAST_FillScoringOptionsInt2 BLAST_FillScoringOptions(BlastScoringOptions *options, EBlastProgramType program_number, Boolean greedy_extension, Int4 penalty, Int4 reward, const char *matrix, Int4 gap_open, Int4 gap_extend)Fill non-default values in the BlastScoringOptions structure.Definition: blast_options.c:823
BlastHSPFilteringOptionsFreeBlastHSPFilteringOptions * BlastHSPFilteringOptionsFree(BlastHSPFilteringOptions *opts)Deallocate a BlastHSPFilteringOptions structure.Definition: blast_options.c:1934
SRepeatFilterOptionsResetDBInt2 SRepeatFilterOptionsResetDB(SRepeatFilterOptions **repeat_options, const char *db)Resets name of db for repeat filtering.Definition: blast_options.c:142
BlastScoringOptionsNewInt2 BlastScoringOptionsNew(EBlastProgramType program_number, BlastScoringOptions **options)Allocate memory for BlastScoringOptions and fill with default values.Definition: blast_options.c:788
BlastEffectiveLengthsOptionsNewInt2 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions **options)Allocate memory for BlastEffectiveLengthsOptions* and fill with default values.Definition: blast_options.c:1003
SReadQualityOptionsFreeSReadQualityOptions * SReadQualityOptionsFree(SReadQualityOptions *read_quality_options)Frees memory for SReadQualityOptions.Definition: blast_options.c:177
SBlastFilterOptionsMaskAtHashBoolean SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions *filter_options)Queries whether masking should be done only for the lookup table or for the entire search.Definition: blast_options.c:433
SBlastFilterOptionsNoFilteringBoolean SBlastFilterOptionsNoFiltering(const SBlastFilterOptions *filter_options)Queries whether no masking is required.Definition: blast_options.c:422
BLAST_GetSuggestedWindowSizeInt2 BLAST_GetSuggestedWindowSize(EBlastProgramType program_number, const char *matrixName, Int4 *window_size)Get window sizes for two hit algorithm suggested by Stephen Altschul.Definition: blast_options.c:1211
SBlastFilterOptionsValidateInt2 SBlastFilterOptionsValidate(EBlastProgramType program_number, const SBlastFilterOptions *filter_options, Blast_Message **blast_message)Validates filter options to ensure that program and options are consistent and that options have vali...Definition: blast_options.c:441
kDustLinkerconst int kDustLinkerParameter used by dust to link together close low-complexity segments.Definition: blast_options.c:48
BlastHSPSubjectBestHitOptionsFreeBlastHSPSubjectBestHitOptions * BlastHSPSubjectBestHitOptionsFree(BlastHSPSubjectBestHitOptions *subject_besthit_opts)Deallocates subject besthit structure.Definition: blast_options.c:1972
BLAST_InitDefaultOptionsInt2 BLAST_InitDefaultOptions(EBlastProgramType program_number, LookupTableOptions **lookup_options, QuerySetUpOptions **query_setup_options, BlastInitialWordOptions **word_options, BlastExtensionOptions **ext_options, BlastHitSavingOptions **hit_options, BlastScoringOptions **score_options, BlastEffectiveLengthsOptions **eff_len_options, PSIBlastOptions **psi_options, BlastDatabaseOptions **db_options)Initialize all the BLAST search options structures with the default values.Definition: blast_options.c:1622
SRepeatFilterOptionsFreeSRepeatFilterOptions * SRepeatFilterOptionsFree(SRepeatFilterOptions *repeat_options)Frees SRepeatFilterOptions.Definition: blast_options.c:117
BlastInitialWordOptionsFreeBlastInitialWordOptions * BlastInitialWordOptionsFree(BlastInitialWordOptions *options)Deallocate memory for BlastInitialWordOptions.Definition: blast_options.c:562
BlastHitSavingOptionsNewInt2 BlastHitSavingOptionsNew(EBlastProgramType program_number, BlastHitSavingOptions **options, Boolean gapped_calculation)Allocate memory for BlastHitSavingOptions.Definition: blast_options.c:1428
BLAST_FillEffectiveLengthsOptionsInt2 BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions *options, Int4 dbseq_num, Int8 db_length, Int8 *searchsp_eff, Int4 num_searchsp)Fill the non-default values in the BlastEffectiveLengthsOptions structure.Definition: blast_options.c:1038
SSegOptionsNewInt2 SSegOptionsNew(SSegOptions **seg_options)Allocates memory for SSegOptions, fills in defaults.Definition: blast_options.c:77
SWindowMaskerOptionsFreeSWindowMaskerOptions * SWindowMaskerOptionsFree(SWindowMaskerOptions *winmask_options)Frees SWindowMaskerOptions.Definition: blast_options.c:104
BlastEffectiveLengthsOptionsFreeBlastEffectiveLengthsOptions * BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions *options)Deallocate memory for BlastEffectiveLengthsOptions*.Definition: blast_options.c:990
SBlastFilterOptionsFreeSBlastFilterOptions * SBlastFilterOptionsFree(SBlastFilterOptions *filter_options)Frees SBlastFilterOptions and all subservient structures.Definition: blast_options.c:204
SWindowMaskerOptionsNewInt2 SWindowMaskerOptionsNew(SWindowMaskerOptions **winmask_options)Allocates memory for SWindowMaskerOptions, fills in defaults.Definition: blast_options.c:90
BlastScoringOptionsSetMatrixInt2 BlastScoringOptionsSetMatrix(BlastScoringOptions *opts, const char *matrix_name)Resets matrix name option.Definition: blast_options.c:974
s_MergeDustOptionsstatic SDustOptions * s_MergeDustOptions(const SDustOptions *opt1, const SDustOptions *opt2)Merges together two sets of dust options, choosing the most non-default one.Definition: blast_options.c:252
SBlastFilterOptionsMergeInt2 SBlastFilterOptionsMerge(SBlastFilterOptions **combined, const SBlastFilterOptions *opt1, const SBlastFilterOptions *opt2)Merges two sets of options together, taking the non-default one as preferred.Definition: blast_options.c:390
BlastExtensionOptionsFreeBlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)Deallocate memory for BlastExtensionOptions.Definition: blast_options.c:650
BLAST_FillInitialWordOptionsInt2 BLAST_FillInitialWordOptions(BlastInitialWordOptions *options, EBlastProgramType program, Int4 window_size, double xdrop_ungapped)Fill non-default values in the BlastInitialWordOptions structure.Definition: blast_options.c:634
SDustOptionsNewInt2 SDustOptionsNew(SDustOptions **dust_options)Allocates memory for SDustOptions, fills in defaults.Definition: blast_options.c:57
BlastInitialWordOptionsValidateInt2 BlastInitialWordOptionsValidate(EBlastProgramType program_number, const BlastInitialWordOptions *options, Blast_Message **blast_msg)Validate correctness of the initial word options.Definition: blast_options.c:601
BLAST_FillExtensionOptionsInt2 BLAST_FillExtensionOptions(BlastExtensionOptions *options, EBlastProgramType program, Int4 greedy, double x_dropoff, double x_dropoff_final)Fill non-default values in the BlastExtensionOptions structure.Definition: blast_options.c:699
LookupTableOptionsValidateInt2 LookupTableOptionsValidate(EBlastProgramType program_number, const LookupTableOptions *options, Blast_Message **blast_msg)Validate LookupTableOptions.Definition: blast_options.c:1284
BlastHitSavingOptionsValidateInt2 BlastHitSavingOptionsValidate(EBlastProgramType program_number, const BlastHitSavingOptions *options, Blast_Message **blast_msg)Validate BlastHitSavingOptions.Definition: blast_options.c:1489
kPSSM_NoImpalaScalingconst double kPSSM_NoImpalaScalingValue used to indicate that no IMPALA-style scaling should be performed when scaling a PSSM.Definition: blast_options.c:43
BlastHSPCullingOptionsFreeBlastHSPCullingOptions * BlastHSPCullingOptionsFree(BlastHSPCullingOptions *culling_opts)Deallocates culling options structure.Definition: blast_options.c:1862
BLAST_FillHitSavingOptionsInt2 BLAST_FillHitSavingOptions(BlastHitSavingOptions *options, double evalue, Int4 hitlist_size, Boolean is_gapped, Int4 culling_limit, Int4 min_diag_separation)Allocate memory for BlastHitSavingOptions.Definition: blast_options.c:1466
SSegOptionsFreeSSegOptions * SSegOptionsFree(SSegOptions *seg_options)Frees SSegOptions.Definition: blast_options.c:70
BlastHSPCullingOptionsValidateInt2 BlastHSPCullingOptionsValidate(const BlastHSPFilteringOptions *opts)Validate culling options.Definition: blast_options.c:1848
BlastHSPSubjectBestHitOptionsValidateInt2 BlastHSPSubjectBestHitOptionsValidate(const BlastHSPFilteringOptions *opts)Validate subject besthit options.Definition: blast_options.c:1961
s_DiscWordOptionsValidatestatic Boolean s_DiscWordOptionsValidate(Int4 word_size, Uint1 template_length, Uint1 template_type, Blast_Message **blast_msg)Validate options for the discontiguous word megablast Word size must be 11 or 12; template length 16,...Definition: blast_options.c:1248
BlastHSPFilteringOptionsNewBlastHSPFilteringOptions * BlastHSPFilteringOptionsNew()Allocate and initialize a BlastHSPFilteringOptions structure.Definition: blast_options.c:1872
LookupTableOptionsNewInt2 LookupTableOptionsNew(EBlastProgramType program_number, LookupTableOptions **options)Allocate memory for lookup table options and fill with default values.Definition: blast_options.c:1077
BlastDatabaseOptionsFreeBlastDatabaseOptions * BlastDatabaseOptionsFree(BlastDatabaseOptions *db_options)Deallocate database options.Definition: blast_options.c:1612
kDustLevelconst int kDustLevelDeclared in blast_def.h as extern const.Definition: blast_options.c:46
BlastHSPSubjectBestHitOptionsNewBlastHSPSubjectBestHitOptions * BlastHSPSubjectBestHitOptionsNew(Boolean isProtein)Allocate a new object for subject besthit options.Definition: blast_options.c:1947
kDustWindowconst int kDustWindowWindow parameter used by dust.Definition: blast_options.c:47
BLAST_FillLookupTableOptionsInt2 BLAST_FillLookupTableOptions(LookupTableOptions *options, EBlastProgramType program_number, Boolean is_megablast, double threshold, Int4 word_size)Allocate memory for lookup table options and fill with default values.Definition: blast_options.c:1129
SReadQualityOptionsNewInt2 SReadQualityOptionsNew(SReadQualityOptions **read_quality_options)Allocates memory for SReadQualityOptions, fills in defaults.Definition: blast_options.c:187
BlastHSPFilteringOptions_AddSubjectBestHitInt2 BlastHSPFilteringOptions_AddSubjectBestHit(BlastHSPFilteringOptions *filt_opts, BlastHSPSubjectBestHitOptions **subject_besthit)Definition: blast_options.c:1982
s_MergeWindowMaskerOptionsstatic SWindowMaskerOptions * s_MergeWindowMaskerOptions(const SWindowMaskerOptions *opt1, const SWindowMaskerOptions *opt2)Merges together two sets of window masker options, choosing the most non-default one.Definition: blast_options.c:357
BlastHSPBestHitOptionsNewBlastHSPBestHitOptions * BlastHSPBestHitOptionsNew(double overhang, double score_edge)Allocate and initialize a BlastHSPBestHitOptions structure.Definition: blast_options.c:1798
SBlastFilterOptionsNewInt2 SBlastFilterOptionsNew(SBlastFilterOptions **filter_options, EFilterOptions type)Allocates memory for SBlastFilterOptions and.Definition: blast_options.c:224
s_BlastExtensionScoringOptionsValidatestatic Int2 s_BlastExtensionScoringOptionsValidate(EBlastProgramType program_number, const BlastExtensionOptions *ext_options, const BlastScoringOptions *score_options, Blast_Message **blast_msg)Checks that the extension and scoring options are consistent with each other.Definition: blast_options.c:1675
BlastEffectiveLengthsOptions_IsSearchSpaceSetBoolean BlastEffectiveLengthsOptions_IsSearchSpaceSet(const BlastEffectiveLengthsOptions *options)Return true if the search spaces is set for any of the queries in the search.Definition: blast_options.c:1020
PSIBlastOptionsNewInt2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)Initialize default options for PSI BLAST.Definition: blast_options.c:1538
s_MergeSegOptionsstatic SSegOptions * s_MergeSegOptions(const SSegOptions *opt1, const SSegOptions *opt2)Merges together two sets of SEG options, choosing the most non-default one.Definition: blast_options.c:290
BlastHSPFilteringOptions_AddBestHitInt2 BlastHSPFilteringOptions_AddBestHit(BlastHSPFilteringOptions *filt_opts, BlastHSPBestHitOptions **best_hit, EBlastStage stage)Add the best hit options.Definition: blast_options.c:1879
s_MergeRepeatOptionsstatic SRepeatFilterOptions * s_MergeRepeatOptions(const SRepeatFilterOptions *opt1, const SRepeatFilterOptions *opt2)Merges together two sets of repeat filter options, choosing the most non-default one.Definition: blast_options.c:326
BlastInitialWordOptionsNewInt2 BlastInitialWordOptionsNew(EBlastProgramType program, BlastInitialWordOptions **options)Allocate memory for BlastInitialWordOptions and fill with default values.Definition: blast_options.c:573
BlastScoringOptionsFreeBlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)Deallocate memory for BlastScoringOptions.Definition: blast_options.c:774
BlastExtensionOptionsValidateInt2 BlastExtensionOptionsValidate(EBlastProgramType program_number, const BlastExtensionOptions *options, Blast_Message **blast_msg)Validate contents of BlastExtensionOptions.Definition: blast_options.c:740
BlastHSPBestHitOptionsFreeBlastHSPBestHitOptions * BlastHSPBestHitOptionsFree(BlastHSPBestHitOptions *opt)Deallocate a BlastHSPBestHitOptions structure.Definition: blast_options.c:1830
LookupTableOptionsFreeLookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)Deallocates memory for LookupTableOptions*.Definition: blast_options.c:1063
BlastScoringOptionsDupInt2 BlastScoringOptionsDup(BlastScoringOptions **new_opt, const BlastScoringOptions *old_opt)Produces copy of "old" options, with new memory allocated.Definition: blast_options.c:956
BlastQuerySetUpOptionsFreeQuerySetUpOptions * BlastQuerySetUpOptionsFree(QuerySetUpOptions *options)Deallocate memory for QuerySetUpOptions.Definition: blast_options.c:501
BlastHSPFilteringOptionsValidateInt2 BlastHSPFilteringOptionsValidate(const BlastHSPFilteringOptions *opts)Validates the BlastHSPFilteringOptions structure.Definition: blast_options.c:1911
BlastExtensionOptionsNewInt2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)Allocate memory for BlastExtensionOptions and fill with default values.Definition: blast_options.c:660
blast_options.hThe structures and functions in blast_options.
PSI_INCLUSION_ETHRESH#define PSI_INCLUSION_ETHRESHDefaults for PSI-BLAST and DELTA-BLAST options.Definition: blast_options.h:163
BLAST_GAP_X_DROPOFF_NUCL#define BLAST_GAP_X_DROPOFF_NUCLdefault dropoff for non-greedy nucleotide gapped extensionsDefinition: blast_options.h:130
BLAST_HITLIST_SIZE#define BLAST_HITLIST_SIZENumber of database sequences to save hits for.Definition: blast_options.h:160
BLAST_GAP_OPEN_MEGABLAST#define BLAST_GAP_OPEN_MEGABLASTdefault gap open penalty (megablast with greedy gapped alignment)Definition: blast_options.h:87
BLAST_UNGAPPED_X_DROPOFF_NUCL#define BLAST_UNGAPPED_X_DROPOFF_NUCLungapped dropoff score for blastn (and megablast)Definition: blast_options.h:124
BLAST_WORD_THRESHOLD_BLASTX#define BLAST_WORD_THRESHOLD_BLASTXdefault threshold (blastx)Definition: blast_options.h:112
BLAST_UNGAPPED_X_DROPOFF_PROT#define BLAST_UNGAPPED_X_DROPOFF_PROTdefault dropoff for ungapped extension; ungapped extensions will stop when the score for the extensio...Definition: blast_options.h:122
BLAST_WORDSIZE_MAPPER#define BLAST_WORDSIZE_MAPPERdefault word size for mapping rna-seq to a genomeDefinition: blast_options.h:73
BLAST_GAP_EXTN_MEGABLAST#define BLAST_GAP_EXTN_MEGABLASTdefault gap open penalty (megablast) with greedy gapped alignment)Definition: blast_options.h:95
DEFAULT_SUBJECT_BESTHIT_PROT_MAX_RANGE_DIFF#define DEFAULT_SUBJECT_BESTHIT_PROT_MAX_RANGE_DIFFDefinition: blast_options.h:1209
eJumperWithTraceback@ eJumperWithTracebackJumper extension (mapping)Definition: blast_options.h:313
eDynProgScoreOnly@ eDynProgScoreOnlystandard affine gappingDefinition: blast_options.h:311
eGreedyScoreOnly@ eGreedyScoreOnlyGreedy extension (megaBlast)Definition: blast_options.h:312
eSmithWatermanScoreOnly@ eSmithWatermanScoreOnlyScore-only smith-waterman.Definition: blast_options.h:314
BLAST_GAP_OPEN_PROT#define BLAST_GAP_OPEN_PROTProtein gap costs are the defaults for the BLOSUM62 scoring matrix.Definition: blast_options.h:84
BLAST_WORDSIZE_MEGABLAST#define BLAST_WORDSIZE_MEGABLASTdefault word size (contiguous megablast; for discontig megablast the word size is explicitly overridd...Definition: blast_options.h:68
DEFAULT_SUBJECT_BESTHIT_NUCL_MAX_RANGE_DIFF#define DEFAULT_SUBJECT_BESTHIT_NUCL_MAX_RANGE_DIFFDefinition: blast_options.h:1210
BLAST_GAP_OPEN_NUCL#define BLAST_GAP_OPEN_NUCLdefault gap open penalty (blastn)Definition: blast_options.h:86
BLAST_GAP_TRIGGER_NUCL#define BLAST_GAP_TRIGGER_NUCLdefault bit score that will trigger a gapped extension for blastnDefinition: blast_options.h:140
BLAST_GAP_EXTN_PROT#define BLAST_GAP_EXTN_PROTcost to extend a gap.Definition: blast_options.h:92
BLAST_GAP_X_DROPOFF_FINAL_NUCL#define BLAST_GAP_X_DROPOFF_FINAL_NUCLdefault dropoff for nucleotide gapped extensions)Definition: blast_options.h:146
MAX_DB_WORD_COUNT_MAPPER#define MAX_DB_WORD_COUNT_MAPPERDefault max frequency for a database word.Definition: blast_options.h:174
BLAST_DEFAULT_MATRIX#define BLAST_DEFAULT_MATRIXDefault matrix name: BLOSUM62.Definition: blast_options.h:77
BLAST_EXPECT_VALUE#define BLAST_EXPECT_VALUEDefault parameters for saving hits.Definition: blast_options.h:158
BLAST_SCAN_RANGE_NUCL#define BLAST_SCAN_RANGE_NUCLdefault scan range (blastn)Definition: blast_options.h:63
BLAST_WINDOW_SIZE_NUCL#define BLAST_WINDOW_SIZE_NUCLdefault window size (blastn)Definition: blast_options.h:58
BLAST_GAP_X_DROPOFF_GREEDY#define BLAST_GAP_X_DROPOFF_GREEDYdefault dropoff for greedy nucleotide gapped extensionsDefinition: blast_options.h:132
BLAST_WORD_THRESHOLD_TBLASTX#define BLAST_WORD_THRESHOLD_TBLASTXdefault threshold (tblastx)Definition: blast_options.h:115
BLAST_WORD_THRESHOLD_BLASTP#define BLAST_WORD_THRESHOLD_BLASTPneighboring word score thresholds; a threshold of zero means that only query and subject words that m...Definition: blast_options.h:104
BLAST_GAP_EXTN_NUCL#define BLAST_GAP_EXTN_NUCLdefault gap open penalty (blastn)Definition: blast_options.h:94
BLAST_GAP_TRIGGER_PROT#define BLAST_GAP_TRIGGER_PROTdefault bit score that will trigger gapped extensionDefinition: blast_options.h:137
BLAST_GAP_X_DROPOFF_PROT#define BLAST_GAP_X_DROPOFF_PROTdefault dropoff for preliminary gapped extensionsDefinition: blast_options.h:128
PSI_PSEUDO_COUNT_CONST#define PSI_PSEUDO_COUNT_CONSTPseudo-count constant for PSI-BLAST.Definition: blast_options.h:164
BLAST_REWARD#define BLAST_REWARDdefault nucleotide match scoreDefinition: blast_options.h:152
eGreedyTbck@ eGreedyTbckGreedy extension (megaBlast)Definition: blast_options.h:322
eDynProgTbck@ eDynProgTbckstandard affine gappingDefinition: blast_options.h:321
eSmithWatermanTbckFull@ eSmithWatermanTbckFullSmith-waterman to find all alignments.Definition: blast_options.h:325
BLAST_PENALTY#define BLAST_PENALTYdefault reward and penalty (only applies to blastn/megablast)Definition: blast_options.h:151
BLAST_WINDOW_SIZE_PROT#define BLAST_WINDOW_SIZE_PROTSome default values (used when creating blast options block and for command-line program defaults.Definition: blast_options.h:57
EFilterOptionsEFilterOptionsTypes of filtering options.Definition: blast_options.h:675
eDustRepeats@ eDustRepeatsRepeat and dust filtering for nucleotides.Definition: blast_options.h:679
eRepeats@ eRepeatsRepeat filtering for nucleotides.Definition: blast_options.h:678
eDust@ eDustlow-complexity for nucleotides.Definition: blast_options.h:677
eEmpty@ eEmptyno filtering at all.Definition: blast_options.h:680
eSeg@ eSeglow-complexity for proteins.Definition: blast_options.h:676
BLAST_GAP_X_DROPOFF_FINAL_PROT#define BLAST_GAP_X_DROPOFF_FINAL_PROTdefault dropoff for the final gapped extension with tracebackDefinition: blast_options.h:144
BLAST_GENETIC_CODE#define BLAST_GENETIC_CODEDefault genetic code for query and/or database.Definition: blast_options.h:168
BLAST_WORD_THRESHOLD_TBLASTN#define BLAST_WORD_THRESHOLD_TBLASTNdefault neighboring threshold (tblastn/rpstblastn)Definition: blast_options.h:113
BLAST_WORDSIZE_PROT#define BLAST_WORDSIZE_PROTlength of word to trigger an extension.Definition: blast_options.h:66
BLAST_WORDSIZE_NUCL#define BLAST_WORDSIZE_NUCLdefault word size (blastn)Definition: blast_options.h:67
kDefaultRepeatFilterDb#define kDefaultRepeatFilterDbDefault value for repeats database filtering.Definition: blast_options.h:246
eNaLookupTable@ eNaLookupTableblastn lookup tableDefinition: blast_options.h:191
eMBLookupTable@ eMBLookupTablemegablast lookup table (includes both contiguous and discontiguous megablast)Definition: blast_options.h:188
ePhiNaLookupTable@ ePhiNaLookupTablenucleotide lookup table for phi-blastDefinition: blast_options.h:195
eAaLookupTable@ eAaLookupTablestandard protein (blastp) lookup tableDefinition: blast_options.h:192
eCompressedAaLookupTable@ eCompressedAaLookupTablecompressed alphabet (blastp) lookup tableDefinition: blast_options.h:193
ePhiLookupTable@ ePhiLookupTableprotein lookup table specialized for phi-blastDefinition: blast_options.h:194
eRPSLookupTable@ eRPSLookupTableRPS lookup table (rpsblast and rpstblastn)Definition: blast_options.h:196
eNaHashLookupTable@ eNaHashLookupTableused for 16-base wordsDefinition: blast_options.h:200
Blast_ProgramIsMappingBoolean Blast_ProgramIsMapping(EBlastProgramType p)Definition: blast_program.c:76
Blast_QueryIsPssmBoolean Blast_QueryIsPssm(EBlastProgramType p)Returns true if the query is PSSM.Definition: blast_program.c:46
Blast_ProgramIsPhiBlastBoolean Blast_ProgramIsPhiBlast(EBlastProgramType p)Returns true if program is PHI-BLAST (i.e.Definition: blast_program.c:70
Blast_QueryIsTranslatedBoolean Blast_QueryIsTranslated(EBlastProgramType p)Returns true if the query is translated.Definition: blast_program.c:60
Blast_ProgramIsNucleotideBoolean Blast_ProgramIsNucleotide(EBlastProgramType p)Definition: blast_program.c:82
Blast_QueryIsPatternBoolean Blast_QueryIsPattern(EBlastProgramType p)Definition: blast_program.c:79
Blast_ProgramIsRpsBlastBoolean Blast_ProgramIsRpsBlast(EBlastProgramType p)Returns true if program is RPS-BLAST (i.e.Definition: blast_program.c:73
EBlastProgramTypeEBlastProgramTypeDefines the engine's notion of the different applications of the BLAST algorithm.Definition: blast_program.h:72
eBlastTypeBlastn@ eBlastTypeBlastnDefinition: blast_program.h:74
eBlastTypeBlastx@ eBlastTypeBlastxDefinition: blast_program.h:75
eBlastTypePsiTblastn@ eBlastTypePsiTblastnDefinition: blast_program.h:83
eBlastTypeRpsTblastn@ eBlastTypeRpsTblastnDefinition: blast_program.h:85
eBlastTypePhiBlastn@ eBlastTypePhiBlastnDefinition: blast_program.h:87
eBlastTypeMapping@ eBlastTypeMappingDefinition: blast_program.h:88
eBlastTypeTblastx@ eBlastTypeTblastxDefinition: blast_program.h:79
eBlastTypePsiBlast@ eBlastTypePsiBlastDefinition: blast_program.h:82
eBlastTypePhiBlastp@ eBlastTypePhiBlastpDefinition: blast_program.h:86
eBlastTypeRpsBlast@ eBlastTypeRpsBlastDefinition: blast_program.h:84
eBlastTypeTblastn@ eBlastTypeTblastnDefinition: blast_program.h:77
eBlastTypeBlastp@ eBlastTypeBlastpDefinition: blast_program.h:73
Blast_SubjectIsTranslatedBoolean Blast_SubjectIsTranslated(EBlastProgramType p)Returns true if the subject is translated.Definition: blast_program.c:63
blast_stat.hDefinitions and prototypes used by blast_stat.c to calculate BLAST statistics.
BLAST_PrintAllowedValueschar * BLAST_PrintAllowedValues(const char *matrix, Int4 gap_open, Int4 gap_extend)Prints a messages about the allowed open etc values for the given matrix, BlastKarlinBlkGappedFill sh...Definition: blast_stat.c:3786
Blast_KarlinBlkGappedLoadFromTablesInt2 Blast_KarlinBlkGappedLoadFromTables(Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, const char *matrix_name, Boolean standard_only)Attempts to fill KarlinBlk for given gap opening, extensions etc.Definition: blast_stat.c:3576
BLAST_PrintMatrixMessagechar * BLAST_PrintMatrixMessage(const char *matrix, Boolean standard_only)Prints a messages about the allowed matrices, BlastKarlinBlkGappedFill should return 1 before this is...Definition: blast_stat.c:3759
BLAST_CheckRewardPenaltyScoresBoolean BLAST_CheckRewardPenaltyScores(Int4 reward, Int4 penalty)Check the validity of the reward and penalty scores.Definition: blast_stat.c:3453
blast_util.hVarious auxiliary BLAST utility functions.
BLAST_StrToUpperchar * BLAST_StrToUpper(const char *string)Returns a copy of the input string with all its characters turned to uppercase.Definition: blast_util.c:1352
composition_constants.hConstants used in compositional score matrix adjustment.
eCompositionBasedStats@ eCompositionBasedStatsComposition-based statistics as in NAR 29:2994-3005, 2001.Definition: composition_constants.h:63
eNoCompositionBasedStats@ eNoCompositionBasedStatsDon't use composition based statistics.Definition: composition_constants.h:61
window_sizestatic ulg window_sizeDefinition: ct_nlmzip_deflate.cpp:164
NULL#define NULLDefinition: ncbistd.hpp:225
Uint1uint8_t Uint11-byte (8-bit) unsigned integerDefinition: ncbitype.h:99
Int2int16_t Int22-byte (16-bit) signed integerDefinition: ncbitype.h:100
Int4int32_t Int44-byte (32-bit) signed integerDefinition: ncbitype.h:102
Uint4uint32_t Uint44-byte (32-bit) unsigned integerDefinition: ncbitype.h:103
Int8int64_t Int88-byte (64-bit) signed integerDefinition: ncbitype.h:104
hspfilter_besthit.hImplementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
kBestHit_OverhangMax#define kBestHit_OverhangMaxMaximum value for overhang.Definition: hspfilter_besthit.h:84
kBestHit_OverhangMin#define kBestHit_OverhangMinMinimum value for overhang.Definition: hspfilter_besthit.h:82
kBestHit_ScoreEdgeMin#define kBestHit_ScoreEdgeMinMinimum value for score_edge.Definition: hspfilter_besthit.h:89
kBestHit_ScoreEdgeMax#define kBestHit_ScoreEdgeMaxMaximum value for score_edge.Definition: hspfilter_besthit.h:91
hspfilter_collector.hImplementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
iint iDefinition: lex.newick.cpp:1456
ifif(yy_accept[yy_current_state])Definition: lex.newick.cpp:1081
util::strcmpint strcmp(const char *str1, const char *str2)Definition: odbc_utils.hpp:160
strdup#define strdupDefinition: ncbi_ansi_ext.h:70
strcasecmp#define strcasecmpDefinition: ncbi_ansi_ext.h:110
INT4_MAX#define INT4_MAXlargest nubmer represented by signed intDefinition: ncbi_std.h:141
BlastMemDupvoid * BlastMemDup(const void *orig, size_t size)Copies memory using memcpy and malloc.Definition: ncbi_std.c:35
BooleanUint1 Booleanbool replacment for CDefinition: ncbi_std.h:94
INT2_MAX#define INT2_MAXlargest number represented by signed (two byte) shortDefinition: ncbi_std.h:156
ASSERT#define ASSERTmacro for assert.Definition: ncbi_std.h:107
MAX#define MAX(a, b)returns larger of a and b.Definition: ncbi_std.h:117
toupperint toupper(Uchar c)Definition: ncbictype.hpp:73
maxT max(T x_, T y_)Definition: njn_function.hpp:105
bufferstatic pcre_uint8 * bufferDefinition: pcretest.c:1051
BlastDatabaseOptionsOptions used to create the ReadDBFILE structure Include database name and various information for res...Definition: blast_options.h:558
BlastDatabaseOptions::genetic_codeInt4 genetic_codeGenetic code to use for translation, tblast[nx] only.Definition: blast_options.h:559
BlastEffectiveLengthsOptionsOptions for setting up effective lengths and search spaces.Definition: blast_options.h:482
BlastEffectiveLengthsOptions::searchsp_effInt8 * searchsp_effSearch space to be used for statistical calculations (one such per query context)Definition: blast_options.h:489
BlastEffectiveLengthsOptions::db_lengthInt8 db_lengthDatabase length to be used for statistical calculations.Definition: blast_options.h:483
BlastEffectiveLengthsOptions::dbseq_numInt4 dbseq_numNumber of database sequences to be used for statistical calculations.Definition: blast_options.h:485
BlastEffectiveLengthsOptions::num_searchspacesInt4 num_searchspacesNumber of elements in searchsp_eff, this must be equal to the number of contexts in the search.Definition: blast_options.h:487
BlastExtensionOptionsOptions used for gapped extension These include: a.Definition: blast_options.h:335
BlastExtensionOptions::eTbackExtEBlastTbackExt eTbackExttype of traceback extension.Definition: blast_options.h:341
BlastExtensionOptions::ePrelimGapExtEBlastPrelimGapExt ePrelimGapExttype of preliminary gapped extension (normally) for calculating score.Definition: blast_options.h:339
BlastExtensionOptions::gap_x_dropoff_finaldouble gap_x_dropoff_finalX-dropoff value for the final gapped extension (in bits)Definition: blast_options.h:337
BlastExtensionOptions::gap_x_dropoffdouble gap_x_dropoffX-dropoff value for gapped extension (in bits)Definition: blast_options.h:336
BlastExtensionOptions::compositionBasedStatsInt4 compositionBasedStatsmode of compositional adjustment to use; if zero then compositional adjustment is not usedDefinition: blast_options.h:342
BlastHSPBestHitOptionsOptions for the Best Hit HSP collection algorithm.Definition: blast_options.h:355
BlastHSPBestHitOptions::score_edgedouble score_edgeDefinition: blast_options.h:357
BlastHSPBestHitOptions::overhangdouble overhangDefinition: blast_options.h:356
BlastHSPCullingOptionsOptions for the HSP culling algorithm.Definition: blast_options.h:361
BlastHSPCullingOptions::max_hitsint max_hitsMaximum number of hits per area of query.Definition: blast_options.h:362
BlastHSPFilteringOptionsStructure containing the HSP filtering/writing options.Definition: blast_options.h:370
BlastHSPFilteringOptions::culling_stageEBlastStage culling_stageDefinition: blast_options.h:377
BlastHSPFilteringOptions::best_hitBlastHSPBestHitOptions * best_hitBest Hit algorithm.Definition: blast_options.h:372
BlastHSPFilteringOptions::subject_besthit_optsBlastHSPSubjectBestHitOptions * subject_besthit_optsSubject Culling.Definition: blast_options.h:380
BlastHSPFilteringOptions::best_hit_stageEBlastStage best_hit_stageDefinition: blast_options.h:373
BlastHSPFilteringOptions::culling_optsBlastHSPCullingOptions * culling_optsculling algorithmDefinition: blast_options.h:376
BlastHSPSubjectBestHitOptionsDefinition: blast_options.h:365
BlastHSPSubjectBestHitOptions::max_range_diffunsigned int max_range_diffDefinition: blast_options.h:366
BlastHitSavingOptionsOptions used when evaluating and saving hits These include: a.Definition: blast_options.h:389
BlastHitSavingOptions::culling_limitInt4 culling_limitIf the query range of an HSP is contained in at least this many higher-scoring HSPs,...Definition: blast_options.h:405
BlastHitSavingOptions::longest_intronInt4 longest_intronThe longest distance between HSPs allowed for combining via sum statistics with uneven gaps.Definition: blast_options.h:417
BlastHitSavingOptions::expect_valuedouble expect_valueThe expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.Definition: blast_options.h:390
BlastHitSavingOptions::cutoff_scoreInt4 cutoff_scoreThe (raw) score cut-off threshold.Definition: blast_options.h:392
BlastHitSavingOptions::hitlist_sizeInt4 hitlist_sizeMaximal number of database sequences to return results for.Definition: blast_options.h:400
BlastHitSavingOptions::min_diag_separationInt4 min_diag_separationHow many diagonals separate a hit from a substantial alignment before it's not blocked out.Definition: blast_options.h:423
BlastHitSavingOptions::max_edit_distanceInt4 max_edit_distanceMaximum number of mismatches and gaps.Definition: blast_options.h:398
BlastHitSavingOptions::hsp_filt_optBlastHSPFilteringOptions * hsp_filt_optContains options to configure the HSP filtering/writering structures If not set, the default HSP filt...Definition: blast_options.h:430
BlastInitialWordOptionsOptions needed for initial word finding and processing.Definition: blast_options.h:297
BlastInitialWordOptions::x_dropoffdouble x_dropoffX-dropoff value (in bits) for the ungapped extension.Definition: blast_options.h:302
BlastInitialWordOptions::window_sizeInt4 window_sizeMaximal allowed distance between 2 hits in case 2 hits are required to trigger the extension.Definition: blast_options.h:299
BlastInitialWordOptions::scan_rangeInt4 scan_rangeMaximal number of gaps allowed between 2 hits.Definition: blast_options.h:301
BlastScoringOptionsScoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...Definition: blast_options.h:459
BlastScoringOptions::penaltyInt2 penaltyPenalty for a mismatch.Definition: blast_options.h:464
BlastScoringOptions::program_numberEBlastProgramType program_numberindicates blastn, blastp, etc.Definition: blast_options.h:476
BlastScoringOptions::gap_openInt4 gap_openExtra penalty for starting a gap.Definition: blast_options.h:468
BlastScoringOptions::gap_extendInt4 gap_extendPenalty for each gap residue.Definition: blast_options.h:469
BlastScoringOptions::rewardInt2 rewardReward for a match.Definition: blast_options.h:463
BlastScoringOptions::gapped_calculationBoolean gapped_calculationgap-free search if FALSEDefinition: blast_options.h:465
BlastScoringOptions::matrix_pathchar * matrix_pathDirectory path to where matrices are stored.Definition: blast_options.h:462
BlastScoringOptions::matrixchar * matrixName of the matrix containing all scores: needed for finding neighboring words.Definition: blast_options.h:460
BlastScoringOptions::is_ooframeBoolean is_ooframeShould out-of-frame gapping be used in a translated search?Definition: blast_options.h:472
Blast_MessageStructure to hold the a message from the core of the BLAST engine.Definition: blast_message.h:70
LookupTableOptionsOptions needed to construct a lookup table Also needed: query sequence and query length.Definition: blast_options.h:206
LookupTableOptions::word_sizeInt4 word_sizeDetermines the size of the lookup table.Definition: blast_options.h:211
LookupTableOptions::phi_patternchar * phi_patternPHI-BLAST pattern.Definition: blast_options.h:214
LookupTableOptions::max_db_word_countUint1 max_db_word_countwords with larger frequency in the database will be masked in the lookup table, if the db_filter opto...Definition: blast_options.h:221
LookupTableOptions::db_filterBoolean db_filterscan the database and include only words that appear in the database between 1 and 9 times (currently...Definition: blast_options.h:217
LookupTableOptions::thresholddouble thresholdScore threshold for putting words in a lookup table (fractional values are allowed,...Definition: blast_options.h:207
LookupTableOptions::mb_template_typeInt4 mb_template_typeType of a discontiguous word template.Definition: blast_options.h:213
LookupTableOptions::lut_typeELookupTableType lut_typeWhat kind of lookup table to construct?Definition: blast_options.h:210
LookupTableOptions::mb_template_lengthInt4 mb_template_lengthLength of the discontiguous words.Definition: blast_options.h:212
PSIBlastOptionsOptions used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...Definition: blast_options.h:496
PSIBlastOptions::nsg_compatibility_modeBoolean nsg_compatibility_modeCompatibility option for the NCBI's structure group (note nsg_ prefix, stands for NCBI's structure gr...Definition: blast_options.h:535
PSIBlastOptions::impala_scaling_factordouble impala_scaling_factorScaling factor as used in IMPALA to do the matrix rescaling.Definition: blast_options.h:543
PSIBlastOptions::inclusion_ethreshdouble inclusion_ethreshMinimum evalue for inclusion in PSSM calculation.Definition: blast_options.h:508
PSIBlastOptions::ignore_unaligned_positionsBoolean ignore_unaligned_positionsThis turns off a validation for the multiple sequence alignment in the PSSM engine for unaligned posi...Definition: blast_options.h:549
PSIBlastOptions::pseudo_countInt4 pseudo_countPseudocount constant.Definition: blast_options.h:499
PSIBlastOptions::use_best_alignmentBoolean use_best_alignmentIf set to TRUE, use the best alignment when multiple HSPs are found in a query-subject alignment (i....Definition: blast_options.h:517
QuerySetUpOptionsOptions required for setting up the query sequence.Definition: blast_options.h:285
QuerySetUpOptions::strand_optionUint1 strand_optionIn blastn: which strand to search: 1 = forward; 2 = reverse; 3 = both.Definition: blast_options.h:290
QuerySetUpOptions::filter_stringchar * filter_stringDEPRECATED, filtering options above.Definition: blast_options.h:288
QuerySetUpOptions::filtering_optionsSBlastFilterOptions * filtering_optionsstructured options for all filtering offered from algo/blast/core for BLAST.Definition: blast_options.h:286
SBlastFilterOptionsAll filtering options.Definition: blast_options.h:272
SBlastFilterOptions::repeatFilterOptionsSRepeatFilterOptions * repeatFilterOptionsfor organism specific repeat filtering.Definition: blast_options.h:277
SBlastFilterOptions::segOptionsSSegOptions * segOptionslow-complexity filtering for proteins sequences (includes translated nucleotides).Definition: blast_options.h:275
SBlastFilterOptions::readQualityOptionsSReadQualityOptions * readQualityOptionsquality filtering for mapping next-generation sequencesDefinition: blast_options.h:280
SBlastFilterOptions::mask_at_hashBoolean mask_at_hashmask query only for lookup table creationDefinition: blast_options.h:273
SBlastFilterOptions::windowMaskerOptionsSWindowMaskerOptions * windowMaskerOptionsorganism specific filtering with window masker.Definition: blast_options.h:278
SBlastFilterOptions::dustOptionsSDustOptions * dustOptionslow-complexity filtering for nucleotides.Definition: blast_options.h:274
SDustOptionsOptions for dust algorithm, applies only to nucl.Definition: blast_options.h:229
SDustOptions::levelint levelDefinition: blast_options.h:230
SDustOptions::linkerint linkermin distance to link segments.Definition: blast_options.h:232
SDustOptions::windowint windowDefinition: blast_options.h:231
SReadQualityOptionsFiltering options for mapping next-generation sequences.Definition: blast_options.h:266
SRepeatFilterOptionsFiltering options for organsim specific repeats filtering.Definition: blast_options.h:252
SRepeatFilterOptions::databasechar * databaseNucleotide database for mini BLAST search.Definition: blast_options.h:253
SSegOptionsOptions for SEG algorithm, applies only to protein-protein comparisons.Definition: blast_options.h:239
SSegOptions::hicutdouble hicutDefinition: blast_options.h:242
SSegOptions::windowint windowinitial window to trigger further work.Definition: blast_options.h:240
SSegOptions::locutdouble locutDefinition: blast_options.h:241
SWindowMaskerOptionsFiltering options for organism-specific filtering with Window Masker.Definition: blast_options.h:260
SWindowMaskerOptions::databaseconst char * databaseUse winmasker database at this location.Definition: blast_options.h:262
SWindowMaskerOptions::taxidint taxidSelect masking database for this TaxID.Definition: blast_options.h:261
typeDefinition: type.c:6
FALSE@ FALSEDefinition: testodbc.c:27
TRUE@ TRUEDefinition: testodbc.c:27
|